diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp index 0b18c6b0e923a..47e32c4864809 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -91,25 +91,17 @@ void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) { GIntrinsic *GI = dyn_cast(&MI); if (GI && GI->is(Intrinsic::amdgcn_if_break)) { S32S64LaneMask.insert(MI.getOperand(3).getReg()); - findLCSSAPhi(MI.getOperand(0).getReg()); + S32S64LaneMask.insert(MI.getOperand(0).getReg()); } if (MI.getOpcode() == AMDGPU::SI_IF || MI.getOpcode() == AMDGPU::SI_ELSE) { - findLCSSAPhi(MI.getOperand(0).getReg()); + S32S64LaneMask.insert(MI.getOperand(0).getReg()); } } } } -void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) { - S32S64LaneMask.insert(Reg); - for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) { - if (LCSSAPhi.isPHI()) - S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg()); - } -} - static LLT getReadAnyLaneSplitTy(LLT Ty) { if (Ty.isVector()) { LLT ElTy = Ty.getElementType(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h index 27f8fed86d647..70cfdacec700c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -47,8 +47,6 @@ class IntrinsicLaneMaskAnalyzer { private: void initLaneMaskIntrinsics(MachineFunction &MF); - // This will not be needed when we turn off LCSSA for global-isel. - void findLCSSAPhi(Register Reg); }; void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 8d3e7829e10e1..eb2ece7bece51 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -312,6 +312,12 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { } // Opcodes that also support S1. + if (Opc == G_FREEZE && + MRI.getType(MI->getOperand(0).getReg()) != LLT::scalar(1)) { + RBLHelper.applyMappingTrivial(*MI); + continue; + } + if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT || Opc == AMDGPU::G_IMPLICIT_DEF)) { Register Dst = MI->getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 3c007987b8494..3383175fc1bdb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -134,6 +134,26 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, switch (Mapping.LoweringMethod) { case DoNotLower: return; + case VccExtToSel: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + Register Src = MI.getOperand(1).getReg(); + unsigned Opc = MI.getOpcode(); + if (Ty == S32 || Ty == S16) { + auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1); + auto False = B.buildConstant({VgprRB, Ty}, 0); + B.buildSelect(MI.getOperand(0).getReg(), Src, True, False); + } + if (Ty == S64) { + auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1); + auto False = B.buildConstant({VgprRB, S32}, 0); + auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False); + B.buildMergeValues( + MI.getOperand(0).getReg(), + {Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)}); + } + MI.eraseFromParent(); + return; + } case UniExtToSel: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); auto True = B.buildConstant({SgprRB, Ty}, @@ -276,6 +296,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case Sgpr64: case Vgpr64: return LLT::scalar(64); + case VgprP0: + return LLT::pointer(0, 64); case SgprP1: case VgprP1: return LLT::pointer(1, 64); @@ -383,6 +405,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { return SgprRB; case Vgpr32: case Vgpr64: + case VgprP0: case VgprP1: case VgprP3: case VgprP4: @@ -425,6 +448,7 @@ void RegBankLegalizeHelper::applyMappingDst( case SgprV4S32: case Vgpr32: case Vgpr64: + case VgprP0: case VgprP1: case VgprP3: case VgprP4: @@ -555,6 +579,7 @@ void RegBankLegalizeHelper::applyMappingSrc( // vgpr scalars, pointers and vectors case Vgpr32: case Vgpr64: + case VgprP0: case VgprP1: case VgprP3: case VgprP4: @@ -653,7 +678,8 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) { // We accept all types that can fit in some register class. // Uniform G_PHIs have all sgpr registers. // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr. - if (Ty == LLT::scalar(32) || Ty == LLT::pointer(4, 64)) { + if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) || + Ty == LLT::pointer(4, 64)) { return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index f293b3aba7b79..eb021d50641ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(32); case S64: return MRI.getType(Reg) == LLT::scalar(64); + case P0: + return MRI.getType(Reg) == LLT::pointer(0, 64); case P1: return MRI.getType(Reg) == LLT::pointer(1, 64); case P3: @@ -58,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::pointer(4, 64); case P5: return MRI.getType(Reg) == LLT::pointer(5, 32); + case V4S32: + return MRI.getType(Reg) == LLT::fixed_vector(4, 32); case B32: return MRI.getType(Reg).getSizeInBits() == 32; case B64: @@ -315,6 +319,7 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) { unsigned IntrID = cast(MI).getIntrinsicID(); if (!IRulesAlias.contains(IntrID)) { + MI.dump(); LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); llvm_unreachable("No rules defined for intrinsic opcode"); } @@ -322,6 +327,7 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { } if (!GRulesAlias.contains(Opc)) { + MI.dump(); LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); llvm_unreachable("No rules defined for generic opcode"); } @@ -431,16 +437,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) + .Div(B32, {{VgprB32}, {VgprB32, VgprB32}}) + .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}}) .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32}); addRulesForGOpcs({G_SHL}, Standard) + .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}) .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}}) .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}}); // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT // and G_FREEZE here, rest is trivially regbankselected earlier + addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}}); addRulesForGOpcs({G_CONSTANT}) .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}}); + addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}}); addRulesForGOpcs({G_ICMP}) .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}}) @@ -471,6 +482,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_ZEXT, G_SEXT}) .Any({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) + .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}); @@ -528,6 +540,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}}) .Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}}) .Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}}) + .Any({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}) .Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}}) .Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}}) @@ -556,15 +569,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, // clang-format on addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, Vector) + .Div(S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) + .Uni(S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) .Div(V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) .Uni(V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}); addRulesForGOpcs({G_STORE}) + .Any({{S32, P0}, {{}, {Vgpr32, VgprP0}}}) .Any({{S32, P1}, {{}, {Vgpr32, VgprP1}}}) .Any({{S64, P1}, {{}, {Vgpr64, VgprP1}}}) .Any({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}}); - addRulesForGOpcs({G_PTR_ADD}).Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}}); + addRulesForGOpcs({G_AMDGPU_BUFFER_STORE}) + .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}}); + + addRulesForGOpcs({G_PTR_ADD}) + .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}}) + .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}}); + + addRulesForGOpcs({G_INTTOPTR}).Any({{UniP4}, {{SgprP4}, {Sgpr64}}}); addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); @@ -585,10 +608,18 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, using namespace Intrinsic; + addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}}); + // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir. addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}}); addRulesForIOpcs({amdgcn_if_break}, Standard) .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}}); + addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard) + .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}}); + + addRulesForIOpcs({amdgcn_readfirstlane}) + .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}}); + } // end initialize rules diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 8280751e1dbdd..d454c0f342d2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -50,6 +50,7 @@ enum UniformityLLTOpPredicateID { DivS64, // pointers + P0, P1, P3, P4, @@ -124,6 +125,7 @@ enum RegBankLLTMappingApplyID { // vgpr scalars, pointers, vectors and B-types Vgpr32, Vgpr64, + VgprP0, VgprP1, VgprP3, VgprP4, @@ -162,6 +164,7 @@ enum RegBankLLTMappingApplyID { // vgpr. Lower it to two S32 vgpr ANDs. enum LoweringMethodID { DoNotLower, + VccExtToSel, UniExtToSel, VgprToVccCopy, SplitTo32, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 96062b30fc012..0c80c7ea744e2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1374,7 +1374,11 @@ bool GCNPassConfig::addPreISel() { // control flow modifications. addPass(createAMDGPURewriteUndefForPHILegacyPass()); - addPass(createLCSSAPass()); + // SDAG requires LCSSA, GlobalISel does not. Disable LCSSA for -global-isel + // with -new-reg-bank-select and without any of the fallback options. + if (!getCGPassBuilderOption().EnableGlobalISelOption || + !isGlobalISelAbortEnabled() || !NewRegBankSelect) + addPass(createLCSSAPass()); if (TM->getOptLevel() > CodeGenOptLevel::Less) addPass(&AMDGPUPerfHintAnalysisLegacyID); @@ -2072,7 +2076,9 @@ void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { // control flow modifications. addPass(AMDGPURewriteUndefForPHIPass()); - addPass(LCSSAPass()); + if (!getCGPassBuilderOption().EnableGlobalISelOption || + !isGlobalISelAbortEnabled() || !NewRegBankSelect) + addPass(LCSSAPass()); if (TM.getOptLevel() > CodeGenOptLevel::Less) addPass(AMDGPUPerfHintAnalysisPass(TM)); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll index c5ded11c7d323..65c96a3db5bbf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; Divergent phis that don't require lowering using lane mask merging @@ -101,27 +101,23 @@ define void @divergent_i1_phi_used_inside_loop(float %val, ptr %addr) { ; GFX10-LABEL: divergent_i1_phi_used_inside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 -; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 1 +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: .LBB2_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_xor_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_cvt_f32_u32_e32 v5, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 -; GFX10-NEXT: v_and_b32_e32 v6, 1, v3 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v6 -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s6, s6, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6 +; GFX10-NEXT: s_xor_b32 s5, s5, 1 +; GFX10-NEXT: s_add_i32 s6, s6, 1 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_cmp_lg_u32 s5, 0 +; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4 ; GFX10-NEXT: flat_store_dword v[1:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -147,29 +143,25 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa ; GFX10-LABEL: divergent_i1_phi_used_inside_loop_bigger_loop_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 +; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 1.0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x3e8 -; GFX10-NEXT: v_mov_b32_e32 v8, s4 -; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: s_branch .LBB3_2 ; GFX10-NEXT: .LBB3_1: ; %loop_body ; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v9, v8 -; GFX10-NEXT: s_xor_b32 s5, s5, -1 -; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v9, v0 -; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s7, exec_lo, s5 -; GFX10-NEXT: s_or_b32 s6, s6, s7 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_cvt_f32_u32_e32 v8, s6 +; GFX10-NEXT: s_xor_b32 s4, s4, exec_lo +; GFX10-NEXT: s_add_i32 s6, s6, 1 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v0 +; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: .LBB3_2: ; %loop_start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: s_cmpk_le_i32 s6, 0x3e8 ; GFX10-NEXT: s_mov_b32 s7, 1 -; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8 -; GFX10-NEXT: s_cbranch_vccz .LBB3_4 +; GFX10-NEXT: s_cbranch_scc0 .LBB3_4 ; GFX10-NEXT: ; %bb.3: ; %else ; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX10-NEXT: s_mov_b32 s7, 0 @@ -177,7 +169,6 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa ; GFX10-NEXT: .LBB3_4: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX10-NEXT: s_xor_b32 s7, s7, 1 -; GFX10-NEXT: s_and_b32 s7, s7, 1 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: s_cbranch_scc1 .LBB3_1 ; GFX10-NEXT: ; %bb.5: ; %if @@ -185,8 +176,8 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa ; GFX10-NEXT: flat_store_dword v[4:5], v1 ; GFX10-NEXT: s_branch .LBB3_1 ; GFX10-NEXT: .LBB3_6: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -234,45 +225,47 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3 ; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0 ; GFX10-NEXT: s_or_b64 s[12:13], s[4:5], s[0:1] -; GFX10-NEXT: s_mov_b32 s3, -1 ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[12:13], 0x0 ; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1 -; GFX10-NEXT: v_xor_b32_e32 v3, 1, v1 -; GFX10-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX10-NEXT: v_and_b32_e32 v3, 1, v1 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX10-NEXT: ; implicit-def: $vgpr3 +; GFX10-NEXT: s_xor_b32 s3, vcc_lo, exec_lo ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_load_dword v2, v2, s[4:7], 0 offen +; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s3 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2 ; GFX10-NEXT: s_cbranch_vccnz .LBB4_4 ; GFX10-NEXT: ; %bb.1: ; %.preheader.preheader -; GFX10-NEXT: v_mov_b32_e32 v3, s1 -; GFX10-NEXT: v_mov_b32_e32 v4, s1 +; GFX10-NEXT: s_mov_b32 s3, 0 ; GFX10-NEXT: .LBB4_2: ; %.preheader ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: buffer_load_dword v5, v3, s[4:7], 0 offen +; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v3, 4, v3 +; GFX10-NEXT: s_add_i32 s1, s1, 4 +; GFX10-NEXT: buffer_load_dword v3, v3, s[4:7], 0 offen ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v4, v5, v4 +; GFX10-NEXT: v_readfirstlane_b32 s12, v3 +; GFX10-NEXT: s_add_i32 s3, s12, s3 ; GFX10-NEXT: s_cbranch_vccnz .LBB4_2 ; GFX10-NEXT: ; %bb.3: ; %.preheader._crit_edge -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2 -; GFX10-NEXT: s_mov_b32 s3, 0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s3, v2 ; GFX10-NEXT: s_or_b32 s1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s1 -; GFX10-NEXT: .LBB4_4: ; %Flow -; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 +; GFX10-NEXT: s_branch .LBB4_6 +; GFX10-NEXT: .LBB4_4: +; GFX10-NEXT: s_mov_b32 s1, exec_lo +; GFX10-NEXT: ; implicit-def: $vgpr1 +; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s1 ; GFX10-NEXT: s_cbranch_vccz .LBB4_6 ; GFX10-NEXT: ; %bb.5: ; %.19 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX10-NEXT: v_or_b32_e32 v3, 2, v1 +; GFX10-NEXT: v_or_b32_e32 v1, 2, v1 ; GFX10-NEXT: .LBB4_6: ; %.22 ; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, 2 -; GFX10-NEXT: buffer_store_dword v3, v0, s[8:11], 0 offen +; GFX10-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen ; GFX10-NEXT: s_endpgm .entry: %.0 = call i64 @llvm.amdgcn.s.getpc() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir index 6594d7f504212..6ce2f9b7a2c77 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir @@ -6,7 +6,7 @@ define void @divergent_i1_phi_uniform_branch_simple() {ret void} define void @divergent_i1_phi_used_inside_loop() {ret void} define void @divergent_i1_phi_used_inside_loop_bigger_loop_body() {ret void} - define void @_amdgpu_cs_main() #0 {ret void} + define void @single_lane_execution_attribute() #0 {ret void} attributes #0 = {"amdgpu-flat-work-group-size"="1,1"} ... @@ -60,10 +60,10 @@ body: | ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C5]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C4]] + ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) @@ -105,9 +105,9 @@ body: | G_BR %bb.2 bb.4: - %16:_(s32) = G_CONSTANT i32 2 - %17:_(s32) = G_CONSTANT i32 1 - %18:_(s32) = G_SELECT %13(s1), %17, %16 + %16:_(s32) = G_SEXT %13(s1) + %17:_(s32) = G_CONSTANT i32 2 + %18:_(s32) = G_ADD %16, %17 G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1) S_ENDPGM 0 ... @@ -149,10 +149,10 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x30000000), %bb.2(0x50000000) @@ -178,9 +178,9 @@ body: | bb.2: %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 - %12:_(s32) = G_CONSTANT i32 2 - %13:_(s32) = G_CONSTANT i32 1 - %14:_(s32) = G_SELECT %11(s1), %13, %12 + %12:_(s32) = G_SEXT %11(s1) + %13:_(s32) = G_CONSTANT i32 2 + %14:_(s32) = G_ADD %12, %13 G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) S_ENDPGM 0 ... @@ -199,39 +199,30 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -242,15 +233,15 @@ body: | %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s1) = G_CONSTANT i1 true - %5:_(s32) = G_CONSTANT i32 0 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(s1) = G_CONSTANT i1 true bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1 + %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 + %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1 %12:_(s1) = G_CONSTANT i1 true %11:_(s1) = G_XOR %10, %12 %13:_(s32) = G_UITOFP %8(s32) @@ -262,13 +253,11 @@ body: | G_BR %bb.2 bb.2: - %16:_(s1) = G_PHI %11(s1), %bb.1 - %17:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) - %18:_(s32) = G_FCONSTANT float 0.000000e+00 - %19:_(s32) = G_FCONSTANT float 1.000000e+00 - %20:_(s32) = G_SELECT %16(s1), %19, %18 - G_STORE %20(s32), %3(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32) + %16:_(s32) = G_FCONSTANT float 0.000000e+00 + %17:_(s32) = G_FCONSTANT float 1.000000e+00 + %18:_(s32) = G_SELECT %11(s1), %17, %16 + G_STORE %18(s32), %3(p0) :: (store (s32)) SI_RETURN ... @@ -293,33 +282,30 @@ body: | ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %39(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %37(s1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C3]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]] ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5 ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -333,36 +319,30 @@ body: | ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: G_STORE [[C7]](s32), [[MV2]](p0) :: (store (s32)) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 + ; GFX10-NEXT: G_STORE [[C6]](s32), [[MV2]](p0) :: (store (s32)) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY10]], [[C8]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY9]], [[C8]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR1]](s1), [[C11]], [[C10]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -380,26 +360,26 @@ body: | %8:_(s32) = COPY $vgpr6 %9:_(s32) = COPY $vgpr7 %10:_(p0) = G_MERGE_VALUES %8(s32), %9(s32) - %11:_(s32) = G_CONSTANT i32 0 - %12:_(s32) = G_FCONSTANT float 1.000000e+00 - %13:_(s1) = G_FCMP floatpred(ogt), %1(s32), %12 + %11:_(s32) = G_FCONSTANT float 1.000000e+00 + %12:_(s1) = G_FCMP floatpred(ogt), %1(s32), %11 + %13:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.4(0x40000000), %bb.2(0x40000000) - %14:_(s32) = G_PHI %15(s32), %bb.5, %11(s32), %bb.0 - %16:_(s32) = G_PHI %11(s32), %bb.0, %17(s32), %bb.5 - %18:_(s1) = G_PHI %13(s1), %bb.0, %19(s1), %bb.5 - %20:_(s1) = G_CONSTANT i1 true - %21:_(s32) = G_CONSTANT i32 1000 - %22:_(s1) = G_ICMP intpred(sle), %16(s32), %21 - G_BRCOND %22(s1), %bb.4 + %14:_(s32) = G_PHI %15(s32), %bb.5, %13(s32), %bb.0 + %16:_(s32) = G_PHI %13(s32), %bb.0, %17(s32), %bb.5 + %18:_(s1) = G_PHI %12(s1), %bb.0, %19(s1), %bb.5 + %20:_(s32) = G_CONSTANT i32 1000 + %21:_(s1) = G_ICMP intpred(sle), %16(s32), %20 + %22:_(s1) = G_CONSTANT i1 true + G_BRCOND %21(s1), %bb.4 G_BR %bb.2 bb.2: successors: %bb.3(0x40000000), %bb.5(0x40000000) - %23:_(s1) = G_PHI %24(s1), %bb.4, %20(s1), %bb.1 + %23:_(s1) = G_PHI %24(s1), %bb.4, %22(s1), %bb.1 %25:_(s1) = G_CONSTANT i1 true %26:_(s1) = G_XOR %23, %25 G_BRCOND %26(s1), %bb.5 @@ -415,9 +395,9 @@ body: | bb.4: successors: %bb.2(0x80000000) - %24:_(s1) = G_CONSTANT i1 false %28:_(s32) = G_CONSTANT i32 1000 G_STORE %28(s32), %10(p0) :: (store (s32)) + %24:_(s1) = G_CONSTANT i1 false G_BR %bb.2 bb.5: @@ -434,22 +414,20 @@ body: | G_BR %bb.6 bb.6: - %33:_(s1) = G_PHI %19(s1), %bb.5 - %34:_(s32) = G_PHI %15(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32) - %35:_(s32) = G_FCONSTANT float 0.000000e+00 - %36:_(s32) = G_FCONSTANT float 1.000000e+00 - %37:_(s32) = G_SELECT %33(s1), %36, %35 - G_STORE %37(s32), %4(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) + %33:_(s32) = G_FCONSTANT float 0.000000e+00 + %34:_(s32) = G_FCONSTANT float 1.000000e+00 + %35:_(s32) = G_SELECT %19(s1), %34, %33 + G_STORE %35(s32), %4(p0) :: (store (s32)) SI_RETURN ... --- -name: _amdgpu_cs_main +name: single_lane_execution_attribute legalized: true tracksRegLiveness: true body: | - ; GFX10-LABEL: name: _amdgpu_cs_main + ; GFX10-LABEL: name: single_lane_execution_attribute ; GFX10: bb.0: ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 @@ -464,24 +442,22 @@ body: | ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR]](s64) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (load (<8 x s32>)) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[BITCAST]](s256) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](s128) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (invariant load (<8 x s32>), align 16, addrspace 4) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C2]](s32), [[C1]](s32) - ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C2]](s32), [[INT1]](s32) - ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C4]](s32), [[C1]](s32) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C4]](s32), [[INT1]](s32) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]] ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FREEZE]], [[C3]](s32) - ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (load (s32), align 1, addrspace 8) + ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[UV]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_BUFFER_LOAD]](s32), [[C1]] - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C4]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C2]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC1]], [[C5]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C5]] ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 @@ -495,8 +471,8 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %58(s1), %bb.4 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %56(s1), %bb.4 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %29(s32), %bb.4, [[DEF]](s32), %bb.0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: G_BRCOND [[COPY4]](s1), %bb.5 ; GFX10-NEXT: G_BR %bb.6 @@ -504,11 +480,11 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %34(s32), %bb.3, [[C6]](s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %36(s32), %bb.3, [[FREEZE]](s32), %bb.1 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %38(s32), %bb.3, [[C6]](s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %33(s32), %bb.3, [[C6]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.3, [[FREEZE]](s32), %bb.1 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %37(s32), %bb.3, [[C6]](s32), %bb.1 ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (load (s32), align 1, addrspace 8) + ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[UV]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AMDGPU_BUFFER_LOAD1]], [[PHI4]] ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C8]] @@ -521,11 +497,10 @@ body: | ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.3 - ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PHI5]](s32), [[AMDGPU_BUFFER_LOAD]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[AMDGPU_BUFFER_LOAD]] ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s1) + ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C10]](s1) ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -537,13 +512,13 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[C11]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5 - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY1]] ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD3]], [[C12]](s32) ; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI6]](s32), [[UV1]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (store (s32), align 1, addrspace 8) + ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI5]](s32), [[UV3]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (dereferenceable store (s32), align 1, addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -559,82 +534,79 @@ body: | %7:_(s64) = G_ZEXT %0(s32) %8:_(s64) = G_OR %6, %7 %9:_(p4) = G_INTTOPTR %8(s64) - %10:_(<8 x s32>) = G_LOAD %9(p4) :: (load (<8 x s32>)) - %11:_(s256) = G_BITCAST %10(<8 x s32>) - %12:_(s128) = G_TRUNC %11(s256) - %13:_(<4 x s32>) = G_BITCAST %12(s128) - %15:_(s32) = G_CONSTANT i32 0 - %14:_(s32) = G_CONSTANT i32 -1 - %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %14(s32), %15(s32) - %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %14(s32), %16(s32) - %18:_(s32) = G_FREEZE %17 - %19:_(s32) = G_CONSTANT i32 2 - %20:_(s32) = G_SHL %18, %19(s32) - %21:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %15(s32), %20, %15, 0, 0, 0 :: (load (s32), align 1, addrspace 8) - %22:_(s1) = G_ICMP intpred(eq), %21(s32), %15 - %23:_(s32) = G_CONSTANT i32 1 - %24:_(s32) = G_AND %18, %23 - %25:_(s1) = G_TRUNC %24(s32) - %26:_(s1) = G_CONSTANT i1 true - %27:_(s1) = G_XOR %25, %26 - G_BRCOND %27(s1), %bb.2 + %10:_(<8 x s32>) = G_LOAD %9(p4) :: (invariant load (<8 x s32>), align 16, addrspace 4) + %11:_(s32) = G_CONSTANT i32 0 + %12:_(s32) = G_CONSTANT i32 1 + %13:_(s32) = G_CONSTANT i32 2 + %14:_(<4 x s32>), %15:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>) + %16:_(s32) = G_CONSTANT i32 -1 + %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %16(s32), %11(s32) + %18:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %16(s32), %17(s32) + %19:_(s32) = G_FREEZE %18 + %20:_(s32) = G_SHL %19, %13(s32) + %21:_(s32) = G_AMDGPU_BUFFER_LOAD %14(<4 x s32>), %11(s32), %20, %11, 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) + %22:_(s1) = G_ICMP intpred(eq), %21(s32), %11 + %23:_(s32) = G_AND %19, %12 + %24:_(s1) = G_TRUNC %23(s32) + %25:_(s1) = G_CONSTANT i1 true + %26:_(s1) = G_XOR %24, %25 + G_BRCOND %26(s1), %bb.2 G_BR %bb.1 bb.1: successors: %bb.3(0x80000000) - %28:_(s32) = G_CONSTANT i32 0 + %27:_(s32) = G_CONSTANT i32 0 G_BR %bb.3 bb.2: successors: %bb.5(0x40000000), %bb.6(0x40000000) - %29:_(s32) = G_PHI %30(s32), %bb.4, %3(s32), %bb.0 - %31:_(s1) = G_PHI %32(s1), %bb.4, %26(s1), %bb.0 - G_BRCOND %31(s1), %bb.5 + %28:_(s32) = G_PHI %29(s32), %bb.4, %3(s32), %bb.0 + %30:_(s1) = G_PHI %31(s1), %bb.4, %25(s1), %bb.0 + G_BRCOND %30(s1), %bb.5 G_BR %bb.6 bb.3: successors: %bb.4(0x04000000), %bb.3(0x7c000000) - %33:_(s32) = G_PHI %34(s32), %bb.3, %28(s32), %bb.1 - %35:_(s32) = G_PHI %36(s32), %bb.3, %18(s32), %bb.1 - %37:_(s32) = G_PHI %38(s32), %bb.3, %28(s32), %bb.1 - %39:_(s32) = G_CONSTANT i32 0 - %40:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %39(s32), %33, %39, 0, 0, 0 :: (load (s32), align 1, addrspace 8) - %38:_(s32) = G_ADD %40, %37 - %41:_(s32) = G_CONSTANT i32 -1 - %36:_(s32) = G_ADD %35, %41 - %42:_(s32) = G_CONSTANT i32 4 - %34:_(s32) = G_ADD %33, %42 - %43:_(s1) = G_ICMP intpred(ne), %36(s32), %39 - G_BRCOND %43(s1), %bb.3 + %32:_(s32) = G_PHI %33(s32), %bb.3, %27(s32), %bb.1 + %34:_(s32) = G_PHI %35(s32), %bb.3, %19(s32), %bb.1 + %36:_(s32) = G_PHI %37(s32), %bb.3, %27(s32), %bb.1 + %38:_(s32) = G_CONSTANT i32 0 + %39:_(s32) = G_AMDGPU_BUFFER_LOAD %14(<4 x s32>), %38(s32), %32, %38, 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8) + %37:_(s32) = G_ADD %39, %36 + %40:_(s32) = G_CONSTANT i32 -1 + %35:_(s32) = G_ADD %34, %40 + %41:_(s32) = G_CONSTANT i32 4 + %33:_(s32) = G_ADD %32, %41 + %42:_(s1) = G_ICMP intpred(ne), %35(s32), %38 + G_BRCOND %42(s1), %bb.3 G_BR %bb.4 bb.4: successors: %bb.2(0x80000000) - %44:_(s32) = G_PHI %38(s32), %bb.3 - %32:_(s1) = G_CONSTANT i1 false - %45:_(s1) = G_ICMP intpred(eq), %44(s32), %21 - %46:_(s1) = G_OR %22, %45 - %30:_(s32) = G_ZEXT %46(s1) + %43:_(s1) = G_ICMP intpred(eq), %37(s32), %21 + %44:_(s1) = G_OR %22, %43 + %29:_(s32) = G_ZEXT %44(s1) + %31:_(s1) = G_CONSTANT i1 false G_BR %bb.2 bb.5: successors: %bb.6(0x80000000) - %47:_(s32) = G_ZEXT %22(s1) - %48:_(s32) = G_CONSTANT i32 2 - %49:_(s32) = G_OR %47, %48 + %45:_(s32) = G_ZEXT %22(s1) + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s32) = G_OR %45, %46 bb.6: - %50:_(s32) = G_PHI %29(s32), %bb.2, %49(s32), %bb.5 - %51:_(<4 x s32>), %52:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>) - %53:_(s32) = G_ADD %2, %1 - %54:_(s32) = G_CONSTANT i32 2 - %55:_(s32) = G_SHL %53, %54(s32) - %56:_(s32) = G_CONSTANT i32 0 - G_AMDGPU_BUFFER_STORE %50(s32), %52(<4 x s32>), %56(s32), %55, %56, 0, 0, 0 :: (store (s32), align 1, addrspace 8) + %48:_(s32) = G_PHI %28(s32), %bb.2, %47(s32), %bb.5 + %49:_(<4 x s32>), %50:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>) + %51:_(s32) = G_ADD %2, %1 + %52:_(s32) = G_CONSTANT i32 2 + %53:_(s32) = G_SHL %51, %52(s32) + %54:_(s32) = G_CONSTANT i32 0 + G_AMDGPU_BUFFER_STORE %48(s32), %50(<4 x s32>), %54(s32), %53, %54, 0, 0, 0 :: (dereferenceable store (s32), align 1, addrspace 8) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index 935200d595307..14d370dd9663f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; This file contains various tests that have divergent i1s used outside of ; the loop. These are lane masks is sgpr and need to have correct value in @@ -14,31 +14,28 @@ define void @divergent_i1_phi_used_outside_loop(float %val, float %pre.cond.val, ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v1 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: s_andn2_b32 s5, s4, exec_lo +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo ; GFX10-NEXT: s_or_b32 s6, s5, s6 -; GFX10-NEXT: ; implicit-def: $sgpr5 +; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: .LBB0_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v1 -; GFX10-NEXT: s_xor_b32 s7, s6, -1 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s5 +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_mov_b32 s7, s6 +; GFX10-NEXT: s_add_i32 s5, s5, 1 +; GFX10-NEXT: s_xor_b32 s6, s6, s8 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0 ; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_andn2_b32 s8, s6, exec_lo -; GFX10-NEXT: s_and_b32 s7, exec_lo, s7 -; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: s_andn2_b32 s8, s7, exec_lo ; GFX10-NEXT: s_and_b32 s6, exec_lo, s6 -; GFX10-NEXT: s_or_b32 s7, s8, s7 -; GFX10-NEXT: s_or_b32 s5, s5, s6 -; GFX10-NEXT: s_mov_b32 s6, s7 +; GFX10-NEXT: s_or_b32 s6, s8, s6 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -65,43 +62,44 @@ define void @divergent_i1_phi_used_outside_loop_larger_loop_body(float %val, ptr ; GFX10-LABEL: divergent_i1_phi_used_outside_loop_larger_loop_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, -1 -; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_andn2_b32 s5, s4, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, -1 -; GFX10-NEXT: s_or_b32 s4, s5, s4 +; GFX10-NEXT: s_and_b32 s6, exec_lo, exec_lo +; GFX10-NEXT: s_mov_b32 s4, -1 +; GFX10-NEXT: s_or_b32 s7, s5, s6 +; GFX10-NEXT: ; implicit-def: $sgpr5 ; GFX10-NEXT: s_branch .LBB1_2 ; GFX10-NEXT: .LBB1_1: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_add_co_u32 v1, s4, v1, 4 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s4, 0, v2, s4 -; GFX10-NEXT: s_andn2_b32 s7, s5, exec_lo -; GFX10-NEXT: s_and_b32 s8, exec_lo, s6 -; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0 -; GFX10-NEXT: s_or_b32 s4, s7, s8 -; GFX10-NEXT: s_cbranch_vccz .LBB1_4 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, 4 +; GFX10-NEXT: s_cmp_ge_i32 s4, 10 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo +; GFX10-NEXT: s_cselect_b32 s8, 1, 0 +; GFX10-NEXT: s_andn2_b32 s7, s6, exec_lo +; GFX10-NEXT: s_and_b32 s9, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s7, s7, s9 +; GFX10-NEXT: s_cmp_lg_u32 s8, 0 +; GFX10-NEXT: s_cbranch_scc0 .LBB1_4 ; GFX10-NEXT: .LBB1_2: ; %loop.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: s_mov_b32 s5, s4 -; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, s5 -; GFX10-NEXT: s_or_b32 s6, s4, s6 -; GFX10-NEXT: s_and_saveexec_b32 s4, s5 +; GFX10-NEXT: s_mov_b32 s6, s7 +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: s_and_b32 s7, exec_lo, s7 +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: s_and_saveexec_b32 s7, s6 ; GFX10-NEXT: s_cbranch_execz .LBB1_1 ; GFX10-NEXT: ; %bb.3: ; %is.eq.zero ; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GFX10-NEXT: global_load_dword v5, v[1:2], off -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo +; GFX10-NEXT: global_load_dword v0, v[1:2], off +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 -; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s6, s6, s7 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: s_and_b32 s8, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s5, s5, s8 ; GFX10-NEXT: s_branch .LBB1_1 ; GFX10-NEXT: .LBB1_4: ; %exit -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 ; GFX10-NEXT: flat_store_dword v[3:4], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -137,25 +135,21 @@ define void @divergent_i1_xor_used_outside_loop(float %val, float %pre.cond.val, ; GFX10-LABEL: divergent_i1_xor_used_outside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 1.0, v1 +; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: .LBB2_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v1 -; GFX10-NEXT: s_xor_b32 s5, s5, -1 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 -; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s7, exec_lo, s5 -; GFX10-NEXT: s_or_b32 s6, s6, s7 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s6 +; GFX10-NEXT: s_add_i32 s6, s6, 1 +; GFX10-NEXT: s_xor_b32 s4, s4, exec_lo +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0 +; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -191,63 +185,61 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts, ; GFX10-LABEL: divergent_i1_xor_used_outside_loop_larger_loop_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, -1 +; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader -; GFX10-NEXT: v_mov_b32_e32 v5, s5 -; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: ; implicit-def: $sgpr7 -; GFX10-NEXT: ; implicit-def: $sgpr8 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: ; implicit-def: $sgpr9 +; GFX10-NEXT: ; implicit-def: $sgpr10 ; GFX10-NEXT: s_branch .LBB3_3 ; GFX10-NEXT: .LBB3_2: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 -; GFX10-NEXT: s_xor_b32 s9, s8, -1 -; GFX10-NEXT: s_and_b32 s10, exec_lo, s7 -; GFX10-NEXT: s_or_b32 s5, s10, s5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s9, exec_lo, s9 -; GFX10-NEXT: s_or_b32 s6, s6, s9 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_xor_b32 s5, s10, exec_lo +; GFX10-NEXT: s_and_b32 s11, exec_lo, s9 +; GFX10-NEXT: s_or_b32 s8, s11, s8 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execz .LBB3_5 ; GFX10-NEXT: .LBB3_3: ; %loop.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo -; GFX10-NEXT: s_and_b32 s9, exec_lo, -1 -; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo -; GFX10-NEXT: s_or_b32 s8, s8, s9 -; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6] -; GFX10-NEXT: s_or_b32 s7, s7, s9 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v1, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v2, v7, vcc_lo -; GFX10-NEXT: global_load_dword v6, v[6:7], off +; GFX10-NEXT: s_ashr_i32 s5, s4, 31 +; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo +; GFX10-NEXT: s_lshl_b64 s[12:13], s[4:5], 2 +; GFX10-NEXT: s_andn2_b32 s5, s10, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v5, s12 +; GFX10-NEXT: v_mov_b32_e32 v6, s13 +; GFX10-NEXT: s_and_b32 s10, exec_lo, exec_lo +; GFX10-NEXT: s_and_b32 s11, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s10, s5, s10 +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v1, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: s_or_b32 s9, s9, s11 +; GFX10-NEXT: global_load_dword v5, v[5:6], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX10-NEXT: s_and_saveexec_b32 s9, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5 +; GFX10-NEXT: s_and_saveexec_b32 s5, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_2 ; GFX10-NEXT: ; %bb.4: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v5 -; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo -; GFX10-NEXT: s_and_b32 s10, exec_lo, 0 -; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo -; GFX10-NEXT: v_mov_b32_e32 v5, v6 -; GFX10-NEXT: s_and_b32 s11, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s8, s8, s10 -; GFX10-NEXT: s_or_b32 s7, s7, s11 +; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, s4, v0 +; GFX10-NEXT: s_andn2_b32 s10, s10, exec_lo +; GFX10-NEXT: s_and_b32 s11, exec_lo, 0 +; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: s_and_b32 s12, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s10, s10, s11 +; GFX10-NEXT: s_or_b32 s9, s9, s12 ; GFX10-NEXT: s_branch .LBB3_2 ; GFX10-NEXT: .LBB3_5: ; %loop.exit.guard -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, s6 -; GFX10-NEXT: s_or_b32 s6, s5, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo +; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s6, s4, s5 ; GFX10-NEXT: .LBB3_6: ; %Flow1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; GFX10-NEXT: s_and_saveexec_b32 s4, s6 ; GFX10-NEXT: s_cbranch_execz .LBB3_8 ; GFX10-NEXT: ; %bb.7: ; %block.after.loop @@ -297,23 +289,19 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: ; implicit-def: $sgpr6 ; GFX10-NEXT: v_mov_b32_e32 v4, s5 ; GFX10-NEXT: s_branch .LBB4_2 ; GFX10-NEXT: .LBB4_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: s_and_b32 s4, exec_lo, s7 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; GFX10-NEXT: s_and_b32 s4, exec_lo, s6 ; GFX10-NEXT: s_or_b32 s5, s4, s5 -; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s6, s4, s6 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 ; GFX10-NEXT: s_cbranch_execz .LBB4_6 ; GFX10-NEXT: .LBB4_2: ; %cond.block.0 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4 -; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo +; GFX10-NEXT: s_and_saveexec_b32 s6, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_4 ; GFX10-NEXT: ; %bb.3: ; %if.block.0 ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 @@ -325,21 +313,21 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: .LBB4_4: ; %loop.break.block ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4 -; GFX10-NEXT: s_mov_b32 s7, -1 -; GFX10-NEXT: s_and_saveexec_b32 s8, s4 +; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_and_saveexec_b32 s7, s4 ; GFX10-NEXT: s_cbranch_execz .LBB4_1 ; GFX10-NEXT: ; %bb.5: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 ; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 -; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo -; GFX10-NEXT: s_and_b32 s7, exec_lo, 0 -; GFX10-NEXT: s_or_b32 s7, s4, s7 +; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo +; GFX10-NEXT: s_and_b32 s6, exec_lo, 0 +; GFX10-NEXT: s_or_b32 s6, s4, s6 ; GFX10-NEXT: s_branch .LBB4_1 ; GFX10-NEXT: .LBB4_6: ; %cond.block.1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: s_and_saveexec_b32 s4, s6 +; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_8 ; GFX10-NEXT: ; %bb.7: ; %if.block.1 ; GFX10-NEXT: global_store_dword v[6:7], v4, off @@ -403,49 +391,48 @@ exit: define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspace(1) %a, ptr %addr) { ; GFX10-LABEL: divergent_i1_freeze_used_outside_loop: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s1, exec_lo +; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_mov_b32 s3, -1 -; GFX10-NEXT: v_mov_b32_e32 v5, s0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: ; implicit-def: $sgpr2 +; GFX10-NEXT: ; implicit-def: $sgpr3 ; GFX10-NEXT: s_branch .LBB5_2 ; GFX10-NEXT: .LBB5_1: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v5 -; GFX10-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s2 +; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v0 +; GFX10-NEXT: s_add_i32 s0, s0, 1 +; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_or_b32 s3, s3, s4 +; GFX10-NEXT: s_and_b32 s4, exec_lo, s3 ; GFX10-NEXT: s_or_b32 s1, s1, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execz .LBB5_4 ; GFX10-NEXT: .LBB5_2: ; %loop.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s3 -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s3 +; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo +; GFX10-NEXT: s_and_b32 s4, exec_lo, s1 +; GFX10-NEXT: s_or_b32 s3, s3, s4 +; GFX10-NEXT: s_and_saveexec_b32 s4, s1 ; GFX10-NEXT: s_cbranch_execz .LBB5_1 ; GFX10-NEXT: ; %bb.3: ; %is.eq.zero ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6] -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v1, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v2, v7, vcc_lo -; GFX10-NEXT: global_load_dword v6, v[6:7], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s3, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v5, s6 +; GFX10-NEXT: v_mov_b32_e32 v6, s7 +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v1, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: global_load_dword v5, v[5:6], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 ; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s2, s2, s3 -; GFX10-NEXT: ; implicit-def: $sgpr3 +; GFX10-NEXT: s_or_b32 s3, s1, s3 +; GFX10-NEXT: ; implicit-def: $sgpr1 ; GFX10-NEXT: s_branch .LBB5_1 ; GFX10-NEXT: .LBB5_4: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s3 ; GFX10-NEXT: flat_store_dword v[3:4], v0 ; GFX10-NEXT: s_endpgm entry: @@ -479,60 +466,63 @@ exit: define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) { ; GFX10-LABEL: loop_with_1break: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: ; implicit-def: $sgpr2 -; GFX10-NEXT: ; implicit-def: $sgpr3 -; GFX10-NEXT: v_mov_b32_e32 v6, s0 +; GFX10-NEXT: ; implicit-def: $sgpr5 +; GFX10-NEXT: ; implicit-def: $sgpr6 ; GFX10-NEXT: s_branch .LBB6_2 ; GFX10-NEXT: .LBB6_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: s_and_b32 s4, exec_lo, s2 -; GFX10-NEXT: s_or_b32 s0, s4, s0 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s3 -; GFX10-NEXT: s_or_b32 s1, s1, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB6_4 ; GFX10-NEXT: .LBB6_2: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, -1 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo -; GFX10-NEXT: global_load_dword v9, v[9:10], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_mov_b32 s7, exec_lo +; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s6, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_and_b32 s6, exec_lo, s7 +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: s_and_b32 s7, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s6, s1, s6 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: global_load_dword v6, v[6:7], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB6_1 ; GFX10-NEXT: ; %bb.3: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: global_load_dword v9, v[7:8], off -; GFX10-NEXT: s_and_b32 s5, exec_lo, 0 -; GFX10-NEXT: v_mov_b32_e32 v6, v10 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s3, s3, s5 -; GFX10-NEXT: s_or_b32 s2, s2, s6 +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_add_i32 s2, s0, 1 +; GFX10-NEXT: s_cmpk_lt_u32 s0, 0x64 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo +; GFX10-NEXT: s_andn2_b32 s3, s6, exec_lo +; GFX10-NEXT: s_and_b32 s6, exec_lo, 0 +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: global_load_dword v8, v[6:7], off +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_or_b32 s6, s3, s6 +; GFX10-NEXT: s_or_b32 s5, s5, s0 +; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 -; GFX10-NEXT: global_store_dword v[7:8], v9, off +; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 +; GFX10-NEXT: global_store_dword v[6:7], v8, off ; GFX10-NEXT: s_branch .LBB6_1 ; GFX10-NEXT: .LBB6_4: ; %loop.exit.guard -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-NEXT: s_and_saveexec_b32 s0, s1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_and_saveexec_b32 s0, s6 ; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB6_6 ; GFX10-NEXT: ; %bb.5: ; %break.body diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir index 5bbe3e4886899..2299191d88b76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir @@ -16,52 +16,43 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]] ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %36(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %22(s1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_2]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -73,16 +64,16 @@ body: | %2:_(s32) = COPY $vgpr2 %3:_(s32) = COPY $vgpr3 %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 0 - %6:_(s32) = G_FCONSTANT float 1.000000e+00 - %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6 + %5:_(s32) = G_FCONSTANT float 1.000000e+00 + %6:_(s1) = G_FCMP floatpred(ogt), %1(s32), %5 + %7:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0 - %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1 - %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1 + %8:_(s32) = G_PHI %9(s32), %bb.1, %7(s32), %bb.0 + %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.1 + %12:_(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.1 %14:_(s1) = G_CONSTANT i1 true %13:_(s1) = G_XOR %12, %14 %15:_(s32) = G_UITOFP %10(s32) @@ -94,13 +85,11 @@ body: | G_BR %bb.2 bb.2: - %18:_(s1) = G_PHI %12(s1), %bb.1 - %19:_(s32) = G_PHI %9(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32) - %20:_(s32) = G_FCONSTANT float 0.000000e+00 - %21:_(s32) = G_FCONSTANT float 1.000000e+00 - %22:_(s32) = G_SELECT %18(s1), %21, %20 - G_STORE %22(s32), %4(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32) + %18:_(s32) = G_FCONSTANT float 0.000000e+00 + %19:_(s32) = G_FCONSTANT float 1.000000e+00 + %20:_(s32) = G_SELECT %12(s1), %19, %18 + G_STORE %20(s32), %4(p0) :: (store (s32)) SI_RETURN ... @@ -120,9 +109,9 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc @@ -133,9 +122,9 @@ body: | ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %27(s1), %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %26(s1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.3 ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1) @@ -175,15 +164,13 @@ body: | ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1 ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C7]], [[C6]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C7]], [[C6]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -196,15 +183,15 @@ body: | %3:_(s32) = COPY $vgpr3 %4:_(s32) = COPY $vgpr4 %5:_(p0) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_CONSTANT i32 -1 - %7:_(s1) = G_CONSTANT i1 true + %6:_(s1) = G_CONSTANT i1 true + %7:_(s32) = G_CONSTANT i32 -1 bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %8:_(s32) = G_PHI %6(s32), %bb.0, %9(s32), %bb.3 + %8:_(s32) = G_PHI %7(s32), %bb.0, %9(s32), %bb.3 %10:_(p1) = G_PHI %2(p1), %bb.0, %11(p1), %bb.3 - %12:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.3 + %12:sreg_32_xm0_xexec(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.3 %14:sreg_32_xm0_xexec(s32) = SI_IF %12(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 @@ -230,11 +217,10 @@ body: | G_BR %bb.4 bb.4: - %22:_(s1) = G_PHI %12(s1), %bb.3 - %23:_(s32) = G_FCONSTANT float 0.000000e+00 - %24:_(s32) = G_FCONSTANT float 1.000000e+00 - %25:_(s32) = G_SELECT %22(s1), %24, %23 - G_STORE %25(s32), %5(p0) :: (store (s32)) + %22:_(s32) = G_FCONSTANT float 0.000000e+00 + %23:_(s32) = G_FCONSTANT float 1.000000e+00 + %24:_(s32) = G_SELECT %12(s1), %23, %22 + G_STORE %24(s32), %5(p0) :: (store (s32)) SI_RETURN ... @@ -253,43 +239,34 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %27(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %22(s1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY5]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -301,16 +278,16 @@ body: | %2:_(s32) = COPY $vgpr2 %3:_(s32) = COPY $vgpr3 %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32) - %5:_(s32) = G_CONSTANT i32 0 - %6:_(s32) = G_FCONSTANT float 1.000000e+00 - %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6 + %5:_(s32) = G_FCONSTANT float 1.000000e+00 + %6:_(s1) = G_FCMP floatpred(ogt), %1(s32), %5 + %7:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0 - %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1 - %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1 + %8:_(s32) = G_PHI %9(s32), %bb.1, %7(s32), %bb.0 + %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.1 + %12:_(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.1 %14:_(s1) = G_CONSTANT i1 true %13:_(s1) = G_XOR %12, %14 %15:_(s32) = G_UITOFP %10(s32) @@ -322,13 +299,11 @@ body: | G_BR %bb.2 bb.2: - %18:_(s1) = G_PHI %13(s1), %bb.1 - %19:_(s32) = G_PHI %9(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32) - %20:_(s32) = G_FCONSTANT float 0.000000e+00 - %21:_(s32) = G_FCONSTANT float 1.000000e+00 - %22:_(s32) = G_SELECT %18(s1), %21, %20 - G_STORE %22(s32), %4(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32) + %18:_(s32) = G_FCONSTANT float 0.000000e+00 + %19:_(s32) = G_FCONSTANT float 1.000000e+00 + %20:_(s32) = G_SELECT %13(s1), %19, %18 + G_STORE %20(s32), %4(p0) :: (store (s32)) SI_RETURN ... @@ -364,13 +339,12 @@ body: | ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %40(s1), %bb.8 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %38(s1), %bb.8 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec @@ -379,49 +353,47 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %73(s1), %bb.7 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %62(s1), %bb.7 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %49(s1), %bb.7 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7 - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %60(s1), %bb.7 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %47(s1), %bb.7 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C4]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C6]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} @@ -438,32 +410,26 @@ body: | ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY18]], [[C9]] - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI4]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]] + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR]](s1) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI3]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY20]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -502,24 +468,24 @@ body: | %16:_(s32) = G_PHI %12(s32), %bb.1, %17(s32), %bb.7 %18:_(s32) = G_PHI %19(s32), %bb.7, %12(s32), %bb.1 - %20:_(s1) = G_CONSTANT i1 true - %21:_(s64) = G_SEXT %18(s32) - %22:_(s32) = G_CONSTANT i32 2 - %23:_(s64) = G_SHL %21, %22(s32) - %24:_(p1) = G_PTR_ADD %3, %23(s64) - %25:_(s32) = G_LOAD %24(p1) :: (load (s32), addrspace 1) - %26:_(s32) = G_CONSTANT i32 0 - %27:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %25(s32), %26 - %28:sreg_32_xm0_xexec(s32) = SI_IF %27(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + %20:_(s64) = G_SEXT %18(s32) + %21:_(s32) = G_CONSTANT i32 2 + %22:_(s64) = G_SHL %20, %21(s32) + %23:_(p1) = G_PTR_ADD %3, %22(s64) + %24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1) + %25:_(s32) = G_CONSTANT i32 0 + %26:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %24(s32), %25 + %27:_(s1) = G_CONSTANT i1 true + %28:sreg_32_xm0_xexec(s32) = SI_IF %26(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.4: successors: %bb.7(0x80000000) - %29:_(s1) = G_CONSTANT i1 false - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %18, %30 - %32:_(s1) = G_ICMP intpred(slt), %18(s32), %0 + %29:_(s32) = G_CONSTANT i32 1 + %30:_(s32) = G_ADD %18, %29 + %31:_(s1) = G_ICMP intpred(slt), %18(s32), %0 + %32:_(s1) = G_CONSTANT i1 false G_BR %bb.7 bb.5: @@ -535,12 +501,12 @@ body: | bb.7: successors: %bb.8(0x04000000), %bb.3(0x7c000000) - %19:_(s32) = G_PHI %31(s32), %bb.4, %7(s32), %bb.3 - %34:_(s1) = G_PHI %29(s1), %bb.4, %20(s1), %bb.3 - %35:_(s1) = G_PHI %32(s1), %bb.4, %20(s1), %bb.3 + %19:_(s32) = G_PHI %30(s32), %bb.4, %7(s32), %bb.3 + %34:_(s1) = G_PHI %32(s1), %bb.4, %27(s1), %bb.3 + %35:_(s1) = G_PHI %31(s1), %bb.4, %27(s1), %bb.3 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s32) %36:_(s1) = G_CONSTANT i1 true - %37:_(s1) = G_XOR %34, %36 + %14:_(s1) = G_XOR %34, %36 %17:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %35(s1), %16(s32) SI_LOOP %17(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.8 @@ -548,9 +514,7 @@ body: | bb.8: successors: %bb.2(0x80000000) - %14:_(s1) = G_PHI %37(s1), %bb.7 - %38:_(s32) = G_PHI %17(s32), %bb.7 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) G_BR %bb.2 ... @@ -572,85 +536,75 @@ body: | ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %39(s1), %bb.6 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI1]] ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) - ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[COPY6]](s1) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY9]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.6 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY12]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: ; GFX10-NEXT: successors: %bb.9(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: G_STORE [[PHI6]](s32), [[MV1]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[MV1]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.9: ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) @@ -667,14 +621,14 @@ body: | %5:_(s32) = COPY $vgpr6 %6:_(s32) = COPY $vgpr7 %7:_(p1) = G_MERGE_VALUES %5(s32), %6(s32) - %8:_(s32) = G_CONSTANT i32 0 - %9:_(s32) = G_IMPLICIT_DEF + %8:_(s32) = G_IMPLICIT_DEF + %9:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x80000000) - %10:_(s32) = G_PHI %11(s32), %bb.6, %8(s32), %bb.0 - %12:_(s32) = G_PHI %8(s32), %bb.0, %13(s32), %bb.6 + %10:_(s32) = G_PHI %11(s32), %bb.6, %9(s32), %bb.0 + %12:_(s32) = G_PHI %9(s32), %bb.0, %13(s32), %bb.6 bb.2: successors: %bb.3(0x40000000), %bb.4(0x40000000) @@ -695,24 +649,24 @@ body: | bb.4: successors: %bb.5(0x40000000), %bb.6(0x40000000) - %20:_(s1) = G_CONSTANT i1 true G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12 - %22:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + %20:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12 + %21:_(s1) = G_CONSTANT i1 true + %22:sreg_32_xm0_xexec(s32) = SI_IF %20(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.5 bb.5: successors: %bb.6(0x80000000) - %23:_(s1) = G_CONSTANT i1 false - %24:_(s32) = G_CONSTANT i32 1 - %25:_(s32) = G_ADD %12, %24 + %23:_(s32) = G_CONSTANT i32 1 + %24:_(s32) = G_ADD %12, %23 + %25:_(s1) = G_CONSTANT i1 false bb.6: successors: %bb.7(0x04000000), %bb.1(0x7c000000) - %13:_(s32) = G_PHI %25(s32), %bb.5, %9(s32), %bb.4 - %26:_(s1) = G_PHI %23(s1), %bb.5, %20(s1), %bb.4 + %13:_(s32) = G_PHI %24(s32), %bb.5, %8(s32), %bb.4 + %26:_(s1) = G_PHI %25(s1), %bb.5, %21(s1), %bb.4 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %22(s32) %11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %26(s1), %10(s32) SI_LOOP %11(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec @@ -721,20 +675,17 @@ body: | bb.7: successors: %bb.8(0x40000000), %bb.9(0x40000000) - %27:_(s32) = G_PHI %11(s32), %bb.6 - %28:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.6 - %29:_(s32) = G_PHI %12(s32), %bb.6 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) - %30:sreg_32_xm0_xexec(s32) = SI_IF %28(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32) + %27:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.8 bb.8: successors: %bb.9(0x80000000) - G_STORE %29(s32), %7(p1) :: (store (s32), addrspace 1) + G_STORE %12(s32), %7(p1) :: (store (s32), addrspace 1) bb.9: - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) SI_RETURN ... @@ -755,78 +706,69 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C]](s1) ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %54(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %43(s1), %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %33(s1), %bb.3 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %41(s1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %31(s1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %12(s32), %bb.3 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI2]](s1), %bb.1, [[DEF2]](s1), %bb.2 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI1]](s1), %bb.1, [[DEF1]](s1), %bb.2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY12]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]] + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C4]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI3]](s32), [[COPY]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI2]](s32) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY16]](s1), [[C6]], [[C5]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FREEZE]](s1), [[C6]], [[C5]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -840,15 +782,15 @@ body: | %4:_(s32) = COPY $vgpr3 %5:_(s32) = COPY $vgpr4 %6:_(p0) = G_MERGE_VALUES %4(s32), %5(s32) - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s1) = G_CONSTANT i1 true + %7:_(s1) = G_CONSTANT i1 true + %8:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %9:_(s32) = G_PHI %10(s32), %bb.3, %7(s32), %bb.0 - %11:_(s32) = G_PHI %7(s32), %bb.0, %12(s32), %bb.3 - %13:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %14(s1), %bb.3 + %9:_(s32) = G_PHI %10(s32), %bb.3, %8(s32), %bb.0 + %11:_(s32) = G_PHI %8(s32), %bb.0, %12(s32), %bb.3 + %13:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %14(s1), %bb.3 %15:sreg_32_xm0_xexec(s32) = SI_IF %13(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 @@ -877,13 +819,11 @@ body: | G_BR %bb.4 bb.4: - %26:_(s1) = G_PHI %14(s1), %bb.3 - %27:_(s32) = G_PHI %10(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) - %28:_(s32) = G_FCONSTANT float 0.000000e+00 - %29:_(s32) = G_FCONSTANT float 1.000000e+00 - %30:_(s32) = G_SELECT %26(s1), %29, %28 - G_STORE %30(s32), %6(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %10(s32) + %26:_(s32) = G_FCONSTANT float 0.000000e+00 + %27:_(s32) = G_FCONSTANT float 1.000000e+00 + %28:_(s32) = G_SELECT %14(s1), %27, %26 + G_STORE %28(s32), %6(p0) :: (store (s32)) S_ENDPGM 0 ... @@ -906,41 +846,38 @@ body: | ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -954,24 +891,24 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]] - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]] + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -982,27 +919,21 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) @@ -1017,23 +948,23 @@ body: | %6:_(s32) = COPY $vgpr4 %7:_(s32) = COPY $vgpr5 %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s32) = G_IMPLICIT_DEF + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.3(0x40000000), %bb.5(0x40000000) - %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0 - %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5 - %15:_(s1) = G_CONSTANT i1 true - %16:_(s64) = G_SEXT %13(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %5, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21 - %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(s32) = G_PHI %12(s32), %bb.5, %10(s32), %bb.0 + %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.5 + %15:_(s64) = G_SEXT %13(s32) + %16:_(s32) = G_CONSTANT i32 2 + %17:_(s64) = G_SHL %15, %16(s32) + %18:_(p1) = G_PTR_ADD %5, %17(s64) + %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1) + %20:_(s32) = G_CONSTANT i32 0 + %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20 + %22:_(s1) = G_CONSTANT i1 true + %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.3 bb.2: @@ -1046,17 +977,17 @@ body: | bb.3: successors: %bb.5(0x80000000) - %25:_(s1) = G_CONSTANT i1 false - %26:_(s32) = G_CONSTANT i32 2 - %27:_(s64) = G_SHL %16, %26(s32) - %28:_(p1) = G_PTR_ADD %2, %27(s64) - %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1) - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %29, %30 - G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1) - %32:_(s32) = G_ADD %13, %30 - %33:_(s32) = G_CONSTANT i32 100 - %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33 + %25:_(s32) = G_CONSTANT i32 2 + %26:_(s64) = G_SHL %15, %25(s32) + %27:_(p1) = G_PTR_ADD %2, %26(s64) + %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1) + %29:_(s32) = G_CONSTANT i32 1 + %30:_(s32) = G_ADD %28, %29 + G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1) + %31:_(s32) = G_ADD %13, %29 + %32:_(s32) = G_CONSTANT i32 100 + %33:_(s1) = G_ICMP intpred(ult), %13(s32), %32 + %34:_(s1) = G_CONSTANT i1 false G_BR %bb.5 bb.4: @@ -1066,9 +997,9 @@ body: | bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1 - %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1 - %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1 + %14:_(s32) = G_PHI %31(s32), %bb.3, %9(s32), %bb.1 + %36:sreg_32_xm0_xexec(s1) = G_PHI %34(s1), %bb.3, %22(s1), %bb.1 + %37:_(s1) = G_PHI %33(s1), %bb.3, %22(s1), %bb.1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32) %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32) SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec @@ -1077,9 +1008,7 @@ body: | bb.6: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5 - %39:_(s32) = G_PHI %12(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32) - %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32) + %35:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll index 1698f84eea518..9b1cc719d5567 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; Simples case, if - then, that requires lane mask merging, ; %phi lane mask will hold %val_A at %A. Lanes that are active in %B @@ -41,9 +41,10 @@ exit: define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %cond) { ; GFX10-LABEL: divergent_i1_phi_if_else: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_and_b32 s0, 1, s0 +; GFX10-NEXT: s_and_b32 s0, s0, 1 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; GFX10-NEXT: s_cmp_lg_u32 s0, 0 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1 ; GFX10-NEXT: ; %bb.1: ; %B @@ -105,46 +106,51 @@ exit: define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a) { ; GFX10-LABEL: loop_with_1break: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: v_mov_b32_e32 v4, s0 +; GFX10-NEXT: ; implicit-def: $sgpr5 ; GFX10-NEXT: s_branch .LBB2_2 ; GFX10-NEXT: .LBB2_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: s_and_b32 s2, exec_lo, s1 -; GFX10-NEXT: s_or_b32 s0, s2, s0 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB2_4 ; GFX10-NEXT: .LBB2_2: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s2, exec_lo, -1 -; GFX10-NEXT: s_or_b32 s1, s1, s2 -; GFX10-NEXT: v_lshlrev_b64 v[5:6], 2, v[4:5] -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v2, v5 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v3, v6, vcc_lo -; GFX10-NEXT: global_load_dword v7, v[7:8], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s5, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v5, s3 +; GFX10-NEXT: v_mov_b32_e32 v4, s2 +; GFX10-NEXT: s_and_b32 s5, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s5, s1, s5 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v2, v4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v3, v5, vcc_lo +; GFX10-NEXT: global_load_dword v4, v[4:5], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7 -; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 +; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB2_1 ; GFX10-NEXT: ; %bb.3: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v0, v5 -; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v1, v6, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v4 -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v4 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: global_load_dword v7, v[5:6], off -; GFX10-NEXT: v_mov_b32_e32 v4, v8 -; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s1, s1, s3 +; GFX10-NEXT: v_mov_b32_e32 v5, s3 +; GFX10-NEXT: v_mov_b32_e32 v4, s2 +; GFX10-NEXT: s_add_i32 s2, s0, 1 +; GFX10-NEXT: s_cmpk_lt_u32 s0, 0x64 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo +; GFX10-NEXT: s_andn2_b32 s3, s5, exec_lo +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_or_b32 s5, s3, s0 +; GFX10-NEXT: global_load_dword v6, v[4:5], off +; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v7, 1, v7 -; GFX10-NEXT: global_store_dword v[5:6], v7, off +; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v6 +; GFX10-NEXT: global_store_dword v[4:5], v6, off ; GFX10-NEXT: s_branch .LBB2_1 ; GFX10-NEXT: .LBB2_4: ; %exit ; GFX10-NEXT: s_endpgm @@ -174,62 +180,69 @@ exit: define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { ; GFX10-LABEL: loop_with_2breaks: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: v_mov_b32_e32 v6, s0 +; GFX10-NEXT: ; implicit-def: $sgpr5 ; GFX10-NEXT: s_branch .LBB3_3 ; GFX10-NEXT: .LBB3_1: ; %Flow3 ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s3, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s1, s1, s3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; GFX10-NEXT: s_andn2_b32 s2, s5, exec_lo +; GFX10-NEXT: s_and_b32 s3, exec_lo, s6 +; GFX10-NEXT: s_or_b32 s5, s2, s3 ; GFX10-NEXT: .LBB3_2: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: s_and_b32 s2, exec_lo, s1 -; GFX10-NEXT: s_or_b32 s0, s2, s0 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: .LBB3_3: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s2, exec_lo, -1 -; GFX10-NEXT: s_or_b32 s1, s1, s2 -; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo -; GFX10-NEXT: global_load_dword v9, v[9:10], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s5, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_and_b32 s5, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s5, s1, s5 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo +; GFX10-NEXT: global_load_dword v6, v[6:7], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_2 ; GFX10-NEXT: ; %bb.4: ; %B ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo -; GFX10-NEXT: s_mov_b32 s4, -1 -; GFX10-NEXT: global_load_dword v9, v[9:10], off +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: global_load_dword v6, v[6:7], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_1 ; GFX10-NEXT: ; %bb.5: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 -; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo -; GFX10-NEXT: global_load_dword v9, v[7:8], off -; GFX10-NEXT: v_mov_b32_e32 v6, v10 -; GFX10-NEXT: s_and_b32 s5, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_add_i32 s2, s0, 1 +; GFX10-NEXT: s_cmpk_lt_u32 s0, 0x64 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo +; GFX10-NEXT: s_andn2_b32 s3, s6, exec_lo +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_or_b32 s6, s3, s0 +; GFX10-NEXT: global_load_dword v8, v[6:7], off +; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 -; GFX10-NEXT: global_store_dword v[7:8], v9, off +; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 +; GFX10-NEXT: global_store_dword v[6:7], v8, off ; GFX10-NEXT: s_branch .LBB3_1 ; GFX10-NEXT: .LBB3_6: ; %exit ; GFX10-NEXT: s_endpgm @@ -265,78 +278,87 @@ exit: define amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) { ; GFX10-LABEL: loop_with_3breaks: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: v_mov_b32_e32 v8, s0 +; GFX10-NEXT: ; implicit-def: $sgpr5 ; GFX10-NEXT: s_branch .LBB4_4 ; GFX10-NEXT: .LBB4_1: ; %Flow5 ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 -; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: s_andn2_b32 s2, s6, exec_lo +; GFX10-NEXT: s_and_b32 s3, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s6, s2, s3 ; GFX10-NEXT: .LBB4_2: ; %Flow4 ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s3, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s1, s1, s3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; GFX10-NEXT: s_andn2_b32 s2, s5, exec_lo +; GFX10-NEXT: s_and_b32 s3, exec_lo, s6 +; GFX10-NEXT: s_or_b32 s5, s2, s3 ; GFX10-NEXT: .LBB4_3: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: s_and_b32 s2, exec_lo, s1 -; GFX10-NEXT: s_or_b32 s0, s2, s0 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB4_8 ; GFX10-NEXT: .LBB4_4: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v9, 31, v8 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s2, exec_lo, -1 -; GFX10-NEXT: s_or_b32 s1, s1, s2 -; GFX10-NEXT: v_lshlrev_b64 v[9:10], 2, v[8:9] -; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v2, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v3, v10, vcc_lo -; GFX10-NEXT: global_load_dword v11, v[11:12], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s5, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v9, s3 +; GFX10-NEXT: v_mov_b32_e32 v8, s2 +; GFX10-NEXT: s_and_b32 s5, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s5, s1, s5 +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v2, v8 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v3, v9, vcc_lo +; GFX10-NEXT: global_load_dword v8, v[8:9], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 -; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 +; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_3 ; GFX10-NEXT: ; %bb.5: ; %B ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 -; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v4, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v5, v10, vcc_lo -; GFX10-NEXT: s_mov_b32 s4, -1 -; GFX10-NEXT: global_load_dword v11, v[11:12], off +; GFX10-NEXT: v_mov_b32_e32 v9, s3 +; GFX10-NEXT: v_mov_b32_e32 v8, s2 +; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v4, v8 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v9, vcc_lo +; GFX10-NEXT: global_load_dword v8, v[8:9], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 -; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 +; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_2 ; GFX10-NEXT: ; %bb.6: ; %C ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 -; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v6, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v7, v10, vcc_lo -; GFX10-NEXT: s_mov_b32 s5, -1 -; GFX10-NEXT: global_load_dword v11, v[11:12], off +; GFX10-NEXT: v_mov_b32_e32 v9, s3 +; GFX10-NEXT: v_mov_b32_e32 v8, s2 +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v6, v8 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v7, v9, vcc_lo +; GFX10-NEXT: global_load_dword v8, v[8:9], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 -; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 +; GFX10-NEXT: s_and_saveexec_b32 s9, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_1 ; GFX10-NEXT: ; %bb.7: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v0, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v1, v10, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v12, 1, v8 -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v8 -; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo -; GFX10-NEXT: global_load_dword v11, v[9:10], off -; GFX10-NEXT: v_mov_b32_e32 v8, v12 -; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s5, s5, s6 +; GFX10-NEXT: v_mov_b32_e32 v9, s3 +; GFX10-NEXT: v_mov_b32_e32 v8, s2 +; GFX10-NEXT: s_add_i32 s2, s0, 1 +; GFX10-NEXT: s_cmpk_lt_u32 s0, 0x64 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v0, v8 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo +; GFX10-NEXT: s_andn2_b32 s3, s8, exec_lo +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_or_b32 s8, s3, s0 +; GFX10-NEXT: global_load_dword v10, v[8:9], off +; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v11 -; GFX10-NEXT: global_store_dword v[9:10], v11, off +; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v10 +; GFX10-NEXT: global_store_dword v[8:9], v10, off ; GFX10-NEXT: s_branch .LBB4_1 ; GFX10-NEXT: .LBB4_8: ; %exit ; GFX10-NEXT: s_endpgm @@ -382,60 +404,63 @@ exit: define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) { ; GFX10-LABEL: loop_with_div_break_with_body: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: ; implicit-def: $sgpr2 -; GFX10-NEXT: ; implicit-def: $sgpr3 -; GFX10-NEXT: v_mov_b32_e32 v6, s0 +; GFX10-NEXT: ; implicit-def: $sgpr5 +; GFX10-NEXT: ; implicit-def: $sgpr6 ; GFX10-NEXT: s_branch .LBB5_2 ; GFX10-NEXT: .LBB5_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: s_and_b32 s4, exec_lo, s2 -; GFX10-NEXT: s_or_b32 s0, s4, s0 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s3 -; GFX10-NEXT: s_or_b32 s1, s1, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB5_4 ; GFX10-NEXT: .LBB5_2: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, -1 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo -; GFX10-NEXT: global_load_dword v9, v[9:10], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_mov_b32 s7, exec_lo +; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s6, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_and_b32 s6, exec_lo, s7 +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: s_and_b32 s7, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s6, s1, s6 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: global_load_dword v6, v[6:7], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB5_1 ; GFX10-NEXT: ; %bb.3: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: global_load_dword v9, v[7:8], off -; GFX10-NEXT: s_and_b32 s5, exec_lo, 0 -; GFX10-NEXT: v_mov_b32_e32 v6, v10 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s3, s3, s5 -; GFX10-NEXT: s_or_b32 s2, s2, s6 +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_add_i32 s2, s0, 1 +; GFX10-NEXT: s_cmpk_lt_u32 s0, 0x64 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo +; GFX10-NEXT: s_andn2_b32 s3, s6, exec_lo +; GFX10-NEXT: s_and_b32 s6, exec_lo, 0 +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: global_load_dword v8, v[6:7], off +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_or_b32 s6, s3, s6 +; GFX10-NEXT: s_or_b32 s5, s5, s0 +; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 -; GFX10-NEXT: global_store_dword v[7:8], v9, off +; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 +; GFX10-NEXT: global_store_dword v[6:7], v8, off ; GFX10-NEXT: s_branch .LBB5_1 ; GFX10-NEXT: .LBB5_4: ; %loop.exit.guard -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-NEXT: s_and_saveexec_b32 s0, s1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_and_saveexec_b32 s0, s6 ; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB5_6 ; GFX10-NEXT: ; %bb.5: ; %break.body @@ -472,8 +497,7 @@ exit: } ; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir -; with irreducible control flow graph. FixIrreducible converts it into natural -; loop and in the process creates i1 phi with three incoming values. +; with irreducible control flow graph. ; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) { ; do { @@ -488,27 +512,103 @@ exit: ; return a0; ; } -; This test is also interesting because it has phi with three incomings -;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { -;.entry: -; %.y_lt_a2 = icmp sgt i32 %a2, %y -; %.x_lt_a2 = icmp sgt i32 %a2, %x -; %.x_lt_a3 = icmp sgt i32 %a3, %x -; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)' -; -;.preheader: ; if (y < a2), -; br label %.inner_loop -; -;.inner_loop: ; do while x < a2 -; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit -; -;.loopexit: ; if x < a3 -; %not.inner_loop = xor i1 %.y_lt_a2, true -; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)' -; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends' -; br i1 %brmerge, label %.exit, label %.preheader -; -;.exit: -; ret i32 %.ret -;} +; This test is also interesting because it had phi with three incomings +; After fa4cc9ddd58eb9fef2497e678873ff3b495340a3, FixIrreducible does not +; generate phi with three incomings. There is a mir test with such phi. +define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; GFX10-LABEL: irreducible_cfg: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, v4, v1 +; GFX10-NEXT: s_mov_b32 s0, exec_lo +; GFX10-NEXT: s_mov_b32 s2, 0 +; GFX10-NEXT: s_and_b32 s3, s0, 1 +; GFX10-NEXT: s_xor_b32 s1, vcc_lo, s0 +; GFX10-NEXT: s_cmp_lg_u32 s3, 0 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: s_branch .LBB6_2 +; GFX10-NEXT: .LBB6_1: ; %Flow2 +; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_and_b32 s3, exec_lo, s4 +; GFX10-NEXT: s_or_b32 s2, s3, s2 +; GFX10-NEXT: s_and_b32 s1, s1, 1 +; GFX10-NEXT: s_cmp_lg_u32 s1, 0 +; GFX10-NEXT: s_cselect_b32 s1, exec_lo, 0 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 +; GFX10-NEXT: s_cbranch_execz .LBB6_8 +; GFX10-NEXT: .LBB6_2: ; %irr.guard +; GFX10-NEXT: ; =>This Loop Header: Depth=1 +; GFX10-NEXT: ; Child Loop BB6_6 Depth 2 +; GFX10-NEXT: s_andn2_b32 s4, s0, exec_lo +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_mov_b32 s3, exec_lo +; GFX10-NEXT: s_or_b32 s0, s4, s0 +; GFX10-NEXT: s_and_saveexec_b32 s4, s1 +; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4 +; GFX10-NEXT: ; %bb.3: ; %.loopexit +; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 +; GFX10-NEXT: v_cmp_gt_i32_e64 s1, v5, v0 +; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_xor_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo +; GFX10-NEXT: s_or_b32 s5, s1, s5 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s1 +; GFX10-NEXT: s_xor_b32 s5, s5, s6 +; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo +; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: s_or_b32 s3, s3, s5 +; GFX10-NEXT: ; %bb.4: ; %Flow1 +; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_mov_b32 s4, exec_lo +; GFX10-NEXT: ; implicit-def: $sgpr1 +; GFX10-NEXT: s_and_saveexec_b32 s5, s3 +; GFX10-NEXT: s_cbranch_execz .LBB6_1 +; GFX10-NEXT: ; %bb.5: ; %.preheader +; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 +; GFX10-NEXT: v_cmp_le_i32_e64 s1, v4, v0 +; GFX10-NEXT: s_mov_b32 s3, 0 +; GFX10-NEXT: .LBB6_6: ; %.inner_loop +; GFX10-NEXT: ; Parent Loop BB6_2 Depth=1 +; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX10-NEXT: s_and_b32 s6, exec_lo, s1 +; GFX10-NEXT: s_or_b32 s3, s6, s3 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 +; GFX10-NEXT: s_cbranch_execnz .LBB6_6 +; GFX10-NEXT: ; %bb.7: ; %Flow +; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 +; GFX10-NEXT: s_andn2_b32 s3, s4, exec_lo +; GFX10-NEXT: s_and_b32 s4, exec_lo, 0 +; GFX10-NEXT: s_mov_b32 s1, 1 +; GFX10-NEXT: s_or_b32 s4, s3, s4 +; GFX10-NEXT: s_branch .LBB6_1 +; GFX10-NEXT: .LBB6_8: ; %.exit +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v3, s0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: ; return to shader part epilog +.entry: + %.y_lt_a2 = icmp sgt i32 %a2, %y + %.x_lt_a2 = icmp sgt i32 %a2, %x + %.x_lt_a3 = icmp sgt i32 %a3, %x + br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)' + +.preheader: ; if (y < a2), + br label %.inner_loop + +.inner_loop: ; do while x < a2 + br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit + +.loopexit: ; if x < a3 + %not.inner_loop = xor i1 %.y_lt_a2, true + %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)' + %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends' + br i1 %brmerge, label %.exit, label %.preheader + +.exit: + ret i32 %.ret +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir index 39ebf66411cc6..bd7140de5d617 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir @@ -39,10 +39,10 @@ body: | ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -69,9 +69,9 @@ body: | bb.2: %12:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32) - %13:_(s32) = G_CONSTANT i32 2 - %14:_(s32) = G_CONSTANT i32 1 - %15:_(s32) = G_SELECT %12(s1), %14, %13 + %13:_(s32) = G_SEXT %12(s1) + %14:_(s32) = G_CONSTANT i32 2 + %15:_(s32) = G_ADD %13, %14 G_STORE %15(s32), %2(p1) :: (store (s32), addrspace 1) S_ENDPGM 0 ... @@ -135,10 +135,10 @@ body: | ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY7]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C3]], [[C4]] - ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY11]](s1) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -179,9 +179,9 @@ body: | bb.4: %15:_(s1) = G_PHI %9(s1), %bb.1, %13(s1), %bb.2 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32) - %16:_(s32) = G_CONSTANT i32 1 + %16:_(s32) = G_SEXT %15(s1) %17:_(s32) = G_CONSTANT i32 2 - %18:_(s32) = G_SELECT %15(s1), %16, %17 + %18:_(s32) = G_ADD %16, %17 G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1) S_ENDPGM 0 ... @@ -202,26 +202,26 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %35(s1), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %34(s1), %bb.3 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc @@ -259,8 +259,7 @@ body: | ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -272,30 +271,30 @@ body: | %3:_(s32) = COPY $vgpr2 %4:_(s32) = COPY $vgpr3 %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) - %6:_(s32) = G_CONSTANT i32 0 - %7:_(s32) = G_IMPLICIT_DEF + %6:_(s32) = G_IMPLICIT_DEF + %7:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %8:_(s32) = G_PHI %9(s32), %bb.3, %6(s32), %bb.0 - %10:_(s32) = G_PHI %6(s32), %bb.0, %11(s32), %bb.3 - %12:_(s1) = G_CONSTANT i1 true - %13:_(s64) = G_SEXT %10(s32) - %14:_(s32) = G_CONSTANT i32 2 - %15:_(s64) = G_SHL %13, %14(s32) - %16:_(p1) = G_PTR_ADD %5, %15(s64) - %17:_(s32) = G_LOAD %16(p1) :: (load (s32), addrspace 1) - %18:_(s32) = G_CONSTANT i32 0 - %19:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %17(s32), %18 - %20:sreg_32_xm0_xexec(s32) = SI_IF %19(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %8:_(s32) = G_PHI %9(s32), %bb.3, %7(s32), %bb.0 + %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.3 + %12:_(s64) = G_SEXT %10(s32) + %13:_(s32) = G_CONSTANT i32 2 + %14:_(s64) = G_SHL %12, %13(s32) + %15:_(p1) = G_PTR_ADD %5, %14(s64) + %16:_(s32) = G_LOAD %15(p1) :: (load (s32), addrspace 1) + %17:_(s32) = G_CONSTANT i32 0 + %18:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %16(s32), %17 + %19:_(s1) = G_CONSTANT i1 true + %20:sreg_32_xm0_xexec(s32) = SI_IF %18(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.3(0x80000000) %21:_(s32) = G_CONSTANT i32 2 - %22:_(s64) = G_SHL %13, %21(s32) + %22:_(s64) = G_SHL %12, %21(s32) %23:_(p1) = G_PTR_ADD %2, %22(s64) %24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1) %25:_(s32) = G_CONSTANT i32 1 @@ -308,16 +307,15 @@ body: | bb.3: successors: %bb.4(0x04000000), %bb.1(0x7c000000) - %11:_(s32) = G_PHI %27(s32), %bb.2, %7(s32), %bb.1 - %30:_(s1) = G_PHI %29(s1), %bb.2, %12(s1), %bb.1 + %11:_(s32) = G_PHI %27(s32), %bb.2, %6(s32), %bb.1 + %30:_(s1) = G_PHI %29(s1), %bb.2, %19(s1), %bb.1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s32) %9:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %30(s1), %8(s32) SI_LOOP %9(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.4: - %31:_(s32) = G_PHI %9(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32) S_ENDPGM 0 ... @@ -340,26 +338,26 @@ body: | ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %48(s1), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %47(s1), %bb.3 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc @@ -370,14 +368,14 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 @@ -385,7 +383,7 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %46(s1), %bb.5 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) @@ -425,8 +423,7 @@ body: | ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -441,43 +438,43 @@ body: | %6:_(s32) = COPY $vgpr4 %7:_(s32) = COPY $vgpr5 %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s32) = G_IMPLICIT_DEF + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %11:_(s32) = G_PHI %12(s32), %bb.3, %9(s32), %bb.0 - %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.3 - %15:_(s1) = G_CONSTANT i1 true - %16:_(s64) = G_SEXT %13(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %5, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21 - %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(s32) = G_PHI %12(s32), %bb.3, %10(s32), %bb.0 + %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.3 + %15:_(s64) = G_SEXT %13(s32) + %16:_(s32) = G_CONSTANT i32 2 + %17:_(s64) = G_SHL %15, %16(s32) + %18:_(p1) = G_PTR_ADD %5, %17(s64) + %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1) + %20:_(s32) = G_CONSTANT i32 0 + %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20 + %22:_(s1) = G_CONSTANT i1 true + %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %24:_(s1) = G_CONSTANT i1 true - %25:_(s32) = G_CONSTANT i32 2 - %26:_(s64) = G_SHL %16, %25(s32) - %27:_(p1) = G_PTR_ADD %8, %26(s64) - %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1) - %29:_(s32) = G_CONSTANT i32 0 - %30:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %28(s32), %29 - %31:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %24:_(s32) = G_CONSTANT i32 2 + %25:_(s64) = G_SHL %15, %24(s32) + %26:_(p1) = G_PTR_ADD %8, %25(s64) + %27:_(s32) = G_LOAD %26(p1) :: (load (s32), addrspace 1) + %28:_(s32) = G_CONSTANT i32 0 + %29:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %27(s32), %28 + %30:_(s1) = G_CONSTANT i1 true + %31:sreg_32_xm0_xexec(s32) = SI_IF %29(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %14:_(s32) = G_PHI %32(s32), %bb.5, %10(s32), %bb.1 - %33:_(s1) = G_PHI %34(s1), %bb.5, %15(s1), %bb.1 + %14:_(s32) = G_PHI %32(s32), %bb.5, %9(s32), %bb.1 + %33:_(s1) = G_PHI %34(s1), %bb.5, %22(s1), %bb.1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32) %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %33(s1), %11(s32) SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec @@ -487,7 +484,7 @@ body: | successors: %bb.5(0x80000000) %35:_(s32) = G_CONSTANT i32 2 - %36:_(s64) = G_SHL %16, %35(s32) + %36:_(s64) = G_SHL %15, %35(s32) %37:_(p1) = G_PTR_ADD %2, %36(s64) %38:_(s32) = G_LOAD %37(p1) :: (load (s32), addrspace 1) %39:_(s32) = G_CONSTANT i32 1 @@ -500,14 +497,13 @@ body: | bb.5: successors: %bb.3(0x80000000) - %32:_(s32) = G_PHI %41(s32), %bb.4, %10(s32), %bb.2 - %34:_(s1) = G_PHI %43(s1), %bb.4, %24(s1), %bb.2 + %32:_(s32) = G_PHI %41(s32), %bb.4, %9(s32), %bb.2 + %34:_(s1) = G_PHI %43(s1), %bb.4, %30(s1), %bb.2 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32) G_BR %bb.3 bb.6: - %44:_(s32) = G_PHI %12(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %44(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32) S_ENDPGM 0 ... @@ -533,26 +529,26 @@ body: | ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %61(s1), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %60(s1), %bb.3 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc @@ -563,14 +559,14 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 @@ -578,7 +574,7 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %59(s1), %bb.5 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) @@ -589,14 +585,14 @@ body: | ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32) ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64) ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C9]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C8]] + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C9]](s1) ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP2]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 @@ -604,7 +600,7 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %72(s1), %bb.7 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %71(s1), %bb.7 ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1) @@ -646,8 +642,7 @@ body: | ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -665,43 +660,43 @@ body: | %9:_(s32) = COPY $vgpr6 %10:_(s32) = COPY $vgpr7 %11:_(p1) = G_MERGE_VALUES %9(s32), %10(s32) - %12:_(s32) = G_CONSTANT i32 0 - %13:_(s32) = G_IMPLICIT_DEF + %12:_(s32) = G_IMPLICIT_DEF + %13:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x40000000), %bb.3(0x40000000) - %14:_(s32) = G_PHI %15(s32), %bb.3, %12(s32), %bb.0 - %16:_(s32) = G_PHI %12(s32), %bb.0, %17(s32), %bb.3 - %18:_(s1) = G_CONSTANT i1 true - %19:_(s64) = G_SEXT %16(s32) - %20:_(s32) = G_CONSTANT i32 2 - %21:_(s64) = G_SHL %19, %20(s32) - %22:_(p1) = G_PTR_ADD %5, %21(s64) - %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) - %24:_(s32) = G_CONSTANT i32 0 - %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 - %26:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + %14:_(s32) = G_PHI %15(s32), %bb.3, %13(s32), %bb.0 + %16:_(s32) = G_PHI %13(s32), %bb.0, %17(s32), %bb.3 + %18:_(s64) = G_SEXT %16(s32) + %19:_(s32) = G_CONSTANT i32 2 + %20:_(s64) = G_SHL %18, %19(s32) + %21:_(p1) = G_PTR_ADD %5, %20(s64) + %22:_(s32) = G_LOAD %21(p1) :: (load (s32), addrspace 1) + %23:_(s32) = G_CONSTANT i32 0 + %24:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %22(s32), %23 + %25:_(s1) = G_CONSTANT i1 true + %26:sreg_32_xm0_xexec(s32) = SI_IF %24(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 bb.2: successors: %bb.4(0x40000000), %bb.5(0x40000000) - %27:_(s1) = G_CONSTANT i1 true - %28:_(s32) = G_CONSTANT i32 2 - %29:_(s64) = G_SHL %19, %28(s32) - %30:_(p1) = G_PTR_ADD %8, %29(s64) - %31:_(s32) = G_LOAD %30(p1) :: (load (s32), addrspace 1) - %32:_(s32) = G_CONSTANT i32 0 - %33:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %31(s32), %32 - %34:sreg_32_xm0_xexec(s32) = SI_IF %33(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %27:_(s32) = G_CONSTANT i32 2 + %28:_(s64) = G_SHL %18, %27(s32) + %29:_(p1) = G_PTR_ADD %8, %28(s64) + %30:_(s32) = G_LOAD %29(p1) :: (load (s32), addrspace 1) + %31:_(s32) = G_CONSTANT i32 0 + %32:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %30(s32), %31 + %33:_(s1) = G_CONSTANT i1 true + %34:sreg_32_xm0_xexec(s32) = SI_IF %32(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.4 bb.3: successors: %bb.8(0x04000000), %bb.1(0x7c000000) - %17:_(s32) = G_PHI %35(s32), %bb.5, %13(s32), %bb.1 - %36:_(s1) = G_PHI %37(s1), %bb.5, %18(s1), %bb.1 + %17:_(s32) = G_PHI %35(s32), %bb.5, %12(s32), %bb.1 + %36:_(s1) = G_PHI %37(s1), %bb.5, %25(s1), %bb.1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32) %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %14(s32) SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec @@ -710,21 +705,21 @@ body: | bb.4: successors: %bb.6(0x40000000), %bb.7(0x40000000) - %38:_(s1) = G_CONSTANT i1 true - %39:_(s32) = G_CONSTANT i32 2 - %40:_(s64) = G_SHL %19, %39(s32) - %41:_(p1) = G_PTR_ADD %11, %40(s64) - %42:_(s32) = G_LOAD %41(p1) :: (load (s32), addrspace 1) - %43:_(s32) = G_CONSTANT i32 0 - %44:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %42(s32), %43 - %45:sreg_32_xm0_xexec(s32) = SI_IF %44(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + %38:_(s32) = G_CONSTANT i32 2 + %39:_(s64) = G_SHL %18, %38(s32) + %40:_(p1) = G_PTR_ADD %11, %39(s64) + %41:_(s32) = G_LOAD %40(p1) :: (load (s32), addrspace 1) + %42:_(s32) = G_CONSTANT i32 0 + %43:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %41(s32), %42 + %44:_(s1) = G_CONSTANT i1 true + %45:sreg_32_xm0_xexec(s32) = SI_IF %43(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 bb.5: successors: %bb.3(0x80000000) - %35:_(s32) = G_PHI %46(s32), %bb.7, %13(s32), %bb.2 - %37:_(s1) = G_PHI %47(s1), %bb.7, %27(s1), %bb.2 + %35:_(s32) = G_PHI %46(s32), %bb.7, %12(s32), %bb.2 + %37:_(s1) = G_PHI %47(s1), %bb.7, %33(s1), %bb.2 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32) G_BR %bb.3 @@ -732,7 +727,7 @@ body: | successors: %bb.7(0x80000000) %48:_(s32) = G_CONSTANT i32 2 - %49:_(s64) = G_SHL %19, %48(s32) + %49:_(s64) = G_SHL %18, %48(s32) %50:_(p1) = G_PTR_ADD %2, %49(s64) %51:_(s32) = G_LOAD %50(p1) :: (load (s32), addrspace 1) %52:_(s32) = G_CONSTANT i32 1 @@ -745,14 +740,13 @@ body: | bb.7: successors: %bb.5(0x80000000) - %46:_(s32) = G_PHI %54(s32), %bb.6, %13(s32), %bb.4 - %47:_(s1) = G_PHI %56(s1), %bb.6, %38(s1), %bb.4 + %46:_(s32) = G_PHI %54(s32), %bb.6, %12(s32), %bb.4 + %47:_(s1) = G_PHI %56(s1), %bb.6, %44(s1), %bb.4 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %45(s32) G_BR %bb.5 bb.8: - %57:_(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %57(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) S_ENDPGM 0 ... @@ -775,41 +769,38 @@ body: | ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -823,24 +814,24 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]] - ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]] + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -851,27 +842,21 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) @@ -886,23 +871,23 @@ body: | %6:_(s32) = COPY $vgpr4 %7:_(s32) = COPY $vgpr5 %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s32) = G_IMPLICIT_DEF + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.3(0x40000000), %bb.5(0x40000000) - %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0 - %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5 - %15:_(s1) = G_CONSTANT i1 true - %16:_(s64) = G_SEXT %13(s32) - %17:_(s32) = G_CONSTANT i32 2 - %18:_(s64) = G_SHL %16, %17(s32) - %19:_(p1) = G_PTR_ADD %5, %18(s64) - %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) - %21:_(s32) = G_CONSTANT i32 0 - %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21 - %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + %11:_(s32) = G_PHI %12(s32), %bb.5, %10(s32), %bb.0 + %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.5 + %15:_(s64) = G_SEXT %13(s32) + %16:_(s32) = G_CONSTANT i32 2 + %17:_(s64) = G_SHL %15, %16(s32) + %18:_(p1) = G_PTR_ADD %5, %17(s64) + %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1) + %20:_(s32) = G_CONSTANT i32 0 + %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20 + %22:_(s1) = G_CONSTANT i1 true + %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.3 bb.2: @@ -915,17 +900,17 @@ body: | bb.3: successors: %bb.5(0x80000000) - %25:_(s1) = G_CONSTANT i1 false - %26:_(s32) = G_CONSTANT i32 2 - %27:_(s64) = G_SHL %16, %26(s32) - %28:_(p1) = G_PTR_ADD %2, %27(s64) - %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1) - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %29, %30 - G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1) - %32:_(s32) = G_ADD %13, %30 - %33:_(s32) = G_CONSTANT i32 100 - %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33 + %25:_(s32) = G_CONSTANT i32 2 + %26:_(s64) = G_SHL %15, %25(s32) + %27:_(p1) = G_PTR_ADD %2, %26(s64) + %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1) + %29:_(s32) = G_CONSTANT i32 1 + %30:_(s32) = G_ADD %28, %29 + G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1) + %31:_(s32) = G_ADD %13, %29 + %32:_(s32) = G_CONSTANT i32 100 + %33:_(s1) = G_ICMP intpred(ult), %13(s32), %32 + %34:_(s1) = G_CONSTANT i1 false G_BR %bb.5 bb.4: @@ -935,9 +920,9 @@ body: | bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1 - %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1 - %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1 + %14:_(s32) = G_PHI %31(s32), %bb.3, %9(s32), %bb.1 + %36:sreg_32_xm0_xexec(s1) = G_PHI %34(s1), %bb.3, %22(s1), %bb.1 + %37:_(s1) = G_PHI %33(s1), %bb.3, %22(s1), %bb.1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32) %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32) SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec @@ -946,10 +931,8 @@ body: | bb.6: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5 - %39:_(s32) = G_PHI %12(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32) - %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32) + %35:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 ... @@ -960,6 +943,223 @@ tracksRegLiveness: true body: | ; GFX10-LABEL: name: irreducible_cfg ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.8(0x80000000) + ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) + ; GFX10-NEXT: G_BR %bb.8 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %51(s1), %bb.5, %50(s1), %bb.8 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %39(s1), %bb.5, %38(s1), %bb.8 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[COPY10]](s1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_BR %bb.2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.4(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: G_BR %bb.4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.8(0x7c000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY10]](s1), %bb.1, %58(s1), %bb.7 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %23(s1), %bb.7, [[DEF]](s1), %bb.1 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), %27(s32) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_BR %bb.6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.4(0x7c000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.2, %29(s32), %bb.4 + ; GFX10-NEXT: [[INT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI4]](s32) + ; GFX10-NEXT: SI_LOOP [[INT1]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_BR %bb.7 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]] + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C4]] + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR1]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[OR]], [[C4]] + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %52(s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: G_BR %bb.1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.6: + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[COPY3]], [[COPY2]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.7: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: G_BR %bb.3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.8: + ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[COPY14]](s1), %bb.3 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, [[COPY13]](s1), %bb.3 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.3 + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[COPY19]](s1) + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY21]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) + ; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY23]](s1) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_BR %bb.5 + bb.0: + successors: %bb.8(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s1) = G_IMPLICIT_DEF + %7:_(s1) = G_ICMP intpred(sgt), %4(s32), %1 + %8:_(s1) = G_CONSTANT i1 true + %9:_(s1) = G_XOR %7, %8 + %10:_(s32) = G_CONSTANT i32 0 + G_BR %bb.8 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %11:_(s1) = G_PHI %12(s1), %bb.5, %13(s1), %bb.8 + %14:sreg_32_xm0_xexec(s1) = G_PHI %15(s1), %bb.5, %16(s1), %bb.8 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) + %18:_(s1) = G_CONSTANT i1 true + %19:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x80000000) + + %20:_(s1) = G_ICMP intpred(sle), %4(s32), %0 + %21:_(s32) = G_CONSTANT i32 0 + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.8(0x7c000000) + + %22:_(s1) = G_PHI %23(s1), %bb.7, %6(s1), %bb.1 + %24:_(s1) = G_PHI %25(s1), %bb.7, %18(s1), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32) + %26:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %27(s32) + SI_LOOP %26(s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.7(0x04000000), %bb.4(0x7c000000) + + %28:_(s32) = G_PHI %21(s32), %bb.2, %29(s32), %bb.4 + %29:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %20(s1), %28(s32) + SI_LOOP %29(s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.7 + + bb.5: + successors: %bb.1(0x80000000) + + %12:_(s1) = G_ICMP intpred(sgt), %5(s32), %0 + %30:_(s1) = G_FREEZE %7 + %31:_(s1) = G_CONSTANT i1 true + %32:_(s1) = G_XOR %30, %31 + %33:_(s1) = G_OR %12, %32 + %15:_(s1) = G_XOR %33, %31 + G_BR %bb.1 + + bb.6: + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32) + %34:_(s32) = G_SELECT %11(s1), %3, %2 + %35:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %34(s32) + $sgpr0 = COPY %35(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + + bb.7: + successors: %bb.3(0x80000000) + + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %29(s32) + %25:_(s1) = G_CONSTANT i1 false + %23:_(s1) = G_CONSTANT i1 true + G_BR %bb.3 + + bb.8: + successors: %bb.5(0x40000000), %bb.1(0x40000000) + + %27:_(s32) = G_PHI %26(s32), %bb.3, %10(s32), %bb.0 + %13:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.3 + %36:sreg_32_xm0_xexec(s1) = G_PHI %9(s1), %bb.0, %22(s1), %bb.3 + %16:_(s1) = G_CONSTANT i1 true + %17:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.5 +... + +# Keep original test that had phi with three incomings. +# FixIrreducible no longer generates such phi. +--- +name: irreducible_cfg_phi_with_three_incomings +legalized: true +tracksRegLiveness: true +body: | + ; GFX10-LABEL: name: irreducible_cfg_phi_with_three_incomings + ; GFX10: bb.0: ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -969,26 +1169,25 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %53(s1), %bb.6, %57(s1), %bb.7 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %45(s1), %bb.6, %49(s1), %bb.7 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %34(s1), %bb.6, %33(s1), %bb.7 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) @@ -1015,27 +1214,23 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]] ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1) - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C2]] ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]] - ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32) - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %49(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %26(s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[COPY3]], [[COPY2]] + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT2]](s32) + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[COPY3]], [[COPY2]] ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32) ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 @@ -1043,34 +1238,31 @@ body: | ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT1]](s32), %bb.3 - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF3]](s1), %bb.4 - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI9]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY17]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF2]](s1), %bb.4 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI6]](s32), %bb.2, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI4]](s1) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY13]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 bb.0: successors: %bb.7(0x80000000) @@ -1082,22 +1274,22 @@ body: | %3:_(s32) = COPY $vgpr3 %4:_(s32) = COPY $vgpr4 %5:_(s32) = COPY $vgpr5 - %6:_(s32) = G_CONSTANT i32 0 - %7:_(s1) = G_IMPLICIT_DEF - %8:_(s1) = G_ICMP intpred(sgt), %4(s32), %1 + %6:_(s1) = G_IMPLICIT_DEF + %7:_(s1) = G_ICMP intpred(sgt), %4(s32), %1 + %8:_(s32) = G_CONSTANT i32 0 G_BR %bb.7 bb.1: successors: %bb.3(0x80000000) - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s1) = G_ICMP intpred(sle), %4(s32), %0 + %9:_(s1) = G_ICMP intpred(sle), %4(s32), %0 + %10:_(s32) = G_CONSTANT i32 0 G_BR %bb.3 bb.2: successors: %bb.4(0x40000000), %bb.7(0x40000000) - %11:_(s1) = G_PHI %12(s1), %bb.6, %7(s1), %bb.7 + %11:_(s1) = G_PHI %12(s1), %bb.6, %6(s1), %bb.7 %13:_(s1) = G_PHI %12(s1), %bb.6, %14(s1), %bb.7 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) %16:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(s1), %17(s32) @@ -1107,8 +1299,8 @@ body: | bb.3: successors: %bb.6(0x04000000), %bb.3(0x7c000000) - %18:_(s32) = G_PHI %9(s32), %bb.1, %19(s32), %bb.3 - %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %18(s32) + %18:_(s32) = G_PHI %10(s32), %bb.1, %19(s32), %bb.3 + %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %9(s1), %18(s32) SI_LOOP %19(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 @@ -1117,18 +1309,18 @@ body: | G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32) %20:_(s1) = G_ICMP intpred(sgt), %5(s32), %0 - %21:_(s1) = G_CONSTANT i1 true - %22:_(s1) = G_XOR %8, %21 - %23:_(s1) = G_OR %20, %22 - %24:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %23(s1), %25(s32) - SI_LOOP %24(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec + %21:_(s1) = G_FREEZE %7 + %22:_(s1) = G_CONSTANT i1 true + %23:_(s1) = G_XOR %21, %22 + %24:_(s1) = G_OR %20, %23 + %25:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %26(s32) + %27:_(s32) = G_CONSTANT i32 0 + SI_LOOP %25(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.5 bb.5: - %26:_(s1) = G_PHI %20(s1), %bb.4 - %27:_(s32) = G_PHI %24(s32), %bb.4 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32) - %28:_(s32) = G_SELECT %26(s1), %3, %2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %25(s32) + %28:_(s32) = G_SELECT %20(s1), %3, %2 %29:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %28(s32) $sgpr0 = COPY %29(s32) SI_RETURN_TO_EPILOG implicit $sgpr0 @@ -1136,18 +1328,17 @@ body: | bb.6: successors: %bb.2(0x80000000) - %30:_(s32) = G_PHI %19(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32) %12:_(s1) = G_CONSTANT i1 false - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32) G_BR %bb.2 bb.7: successors: %bb.1(0x40000000), %bb.2(0x40000000) - %25:_(s32) = G_PHI %24(s32), %bb.4, %25(s32), %bb.2, %6(s32), %bb.0 - %17:_(s32) = G_PHI %6(s32), %bb.4, %16(s32), %bb.2, %6(s32), %bb.0 - %31:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %11(s1), %bb.2, %21(s1), %bb.4 + %26:_(s32) = G_PHI %25(s32), %bb.4, %26(s32), %bb.2, %8(s32), %bb.0 + %17:_(s32) = G_PHI %27(s32), %bb.4, %16(s32), %bb.2, %8(s32), %bb.0 + %30:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %11(s1), %bb.2, %22(s1), %bb.4 %14:_(s1) = G_CONSTANT i1 true - %15:sreg_32_xm0_xexec(s32) = SI_IF %31(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + %15:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll index 1855ede0483de..6e72c11f892a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll @@ -1,31 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s define void @temporal_divergent_i1_phi(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i1_phi: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s6, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 -; GFX10-NEXT: ; implicit-def: $sgpr6 ; GFX10-NEXT: .LBB0_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v5, v4 -; GFX10-NEXT: v_and_b32_e32 v6, 1, v3 -; GFX10-NEXT: v_xor_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v6 -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s6, s6, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s5 +; GFX10-NEXT: s_mov_b32 s7, s6 +; GFX10-NEXT: s_add_i32 s5, s5, 1 +; GFX10-NEXT: s_xor_b32 s6, s6, 1 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_cmp_lg_u32 s7, 0 +; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4 ; GFX10-NEXT: flat_store_dword v[1:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -51,27 +48,23 @@ define void @temporal_divergent_i1_non_phi(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i1_non_phi: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 -; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 1 +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: .LBB1_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_xor_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_cvt_f32_u32_e32 v5, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 -; GFX10-NEXT: v_and_b32_e32 v6, 1, v3 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v6 -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s6, s6, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6 +; GFX10-NEXT: s_xor_b32 s5, s5, 1 +; GFX10-NEXT: s_add_i32 s6, s6, 1 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB1_1 ; GFX10-NEXT: ; %bb.2: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_cmp_lg_u32 s5, 0 +; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4 ; GFX10-NEXT: flat_store_dword v[1:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -98,60 +91,60 @@ exit: define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %a.break) { ; GFX10-LABEL: loop_with_1break: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: s_mov_b32 s8, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: v_mov_b32_e32 v4, s1 -; GFX10-NEXT: v_mov_b32_e32 v3, s0 -; GFX10-NEXT: v_mov_b32_e32 v5, s4 -; GFX10-NEXT: ; implicit-def: $sgpr0 -; GFX10-NEXT: ; implicit-def: $sgpr1 +; GFX10-NEXT: ; implicit-def: $sgpr9 ; GFX10-NEXT: s_branch .LBB2_3 ; GFX10-NEXT: .LBB2_1: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB2_3 Depth=1 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v5 -; GFX10-NEXT: v_cmp_lt_u32_e32 vcc_lo, v5, v2 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: global_load_dword v8, v[6:7], off +; GFX10-NEXT: v_mov_b32_e32 v4, s6 +; GFX10-NEXT: v_mov_b32_e32 v5, s7 +; GFX10-NEXT: s_andn2_b32 s6, s9, exec_lo ; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: v_mov_b32_e32 v5, v9 -; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s1, s1, s6 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo +; GFX10-NEXT: v_cmp_lt_u32_e32 vcc_lo, s4, v2 +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: global_load_dword v6, v[4:5], off +; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s9, s6, s7 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 -; GFX10-NEXT: global_store_dword v[6:7], v8, off +; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v6 +; GFX10-NEXT: global_store_dword v[4:5], v6, off ; GFX10-NEXT: .LBB2_2: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB2_3 Depth=1 -; GFX10-NEXT: s_and_b32 s5, 1, s5 -; GFX10-NEXT: s_and_b32 s6, exec_lo, s1 -; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, s5 -; GFX10-NEXT: s_or_b32 s4, s6, s4 -; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 -; GFX10-NEXT: s_or_b32 s0, s0, s5 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_cmp_lg_u32 s5, 0 +; GFX10-NEXT: s_cselect_b32 s5, exec_lo, 0 +; GFX10-NEXT: s_and_b32 s6, exec_lo, s9 +; GFX10-NEXT: s_or_b32 s8, s6, s8 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execz .LBB2_5 ; GFX10-NEXT: .LBB2_3: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, -1 -; GFX10-NEXT: s_or_b32 s1, s1, s5 -; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6] -; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v3, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v4, v7, vcc_lo -; GFX10-NEXT: global_load_dword v8, v[8:9], off +; GFX10-NEXT: s_ashr_i32 s5, s4, 31 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[4:5], 2 +; GFX10-NEXT: s_add_u32 s10, s0, s6 +; GFX10-NEXT: s_addc_u32 s11, s1, s7 +; GFX10-NEXT: global_load_dword v4, v3, s[10:11] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX10-NEXT: s_cbranch_vccnz .LBB2_1 +; GFX10-NEXT: v_readfirstlane_b32 s5, v4 +; GFX10-NEXT: s_cmp_lg_u32 s5, 0 +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo +; GFX10-NEXT: s_and_b32 s10, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s9, s9, s10 +; GFX10-NEXT: s_cmp_lg_u32 s5, 0 +; GFX10-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX10-NEXT: ; %bb.4: ; in Loop: Header=BB2_3 Depth=1 ; GFX10-NEXT: s_mov_b32 s5, 1 -; GFX10-NEXT: ; implicit-def: $vgpr5 +; GFX10-NEXT: ; implicit-def: $sgpr4 ; GFX10-NEXT: s_branch .LBB2_2 ; GFX10-NEXT: .LBB2_5: ; %loop.exit.guard -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s1, s0 -; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_and_saveexec_b32 s0, s5 +; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB2_7 ; GFX10-NEXT: ; %bb.6: ; %break.body ; GFX10-NEXT: v_mov_b32_e32 v0, 10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir index fb436623bed2d..e9a415c3da7ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir @@ -15,39 +15,30 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI2]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -58,15 +49,15 @@ body: | %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s1) = G_CONSTANT i1 true - %5:_(s32) = G_CONSTANT i32 0 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(s1) = G_CONSTANT i1 true bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1 + %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 + %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1 %12:_(s1) = G_CONSTANT i1 true %11:_(s1) = G_XOR %10, %12 %13:_(s32) = G_UITOFP %8(s32) @@ -78,13 +69,11 @@ body: | G_BR %bb.2 bb.2: - %16:_(s1) = G_PHI %10(s1), %bb.1 - %17:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) - %18:_(s32) = G_FCONSTANT float 0.000000e+00 - %19:_(s32) = G_FCONSTANT float 1.000000e+00 - %20:_(s32) = G_SELECT %16(s1), %19, %18 - G_STORE %20(s32), %3(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32) + %16:_(s32) = G_FCONSTANT float 0.000000e+00 + %17:_(s32) = G_FCONSTANT float 1.000000e+00 + %18:_(s32) = G_SELECT %10(s1), %17, %16 + G_STORE %18(s32), %3(p0) :: (store (s32)) SI_RETURN ... @@ -102,39 +91,30 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -145,15 +125,15 @@ body: | %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s1) = G_CONSTANT i1 true - %5:_(s32) = G_CONSTANT i32 0 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(s1) = G_CONSTANT i1 true bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 - %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1 + %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 + %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1 %12:_(s1) = G_CONSTANT i1 true %11:_(s1) = G_XOR %10, %12 %13:_(s32) = G_UITOFP %8(s32) @@ -165,13 +145,11 @@ body: | G_BR %bb.2 bb.2: - %16:_(s1) = G_PHI %11(s1), %bb.1 - %17:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32) - %18:_(s32) = G_FCONSTANT float 0.000000e+00 - %19:_(s32) = G_FCONSTANT float 1.000000e+00 - %20:_(s32) = G_SELECT %16(s1), %19, %18 - G_STORE %20(s32), %3(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32) + %16:_(s32) = G_FCONSTANT float 0.000000e+00 + %17:_(s32) = G_FCONSTANT float 1.000000e+00 + %18:_(s32) = G_SELECT %11(s1), %17, %16 + G_STORE %18(s32), %3(p0) :: (store (s32)) SI_RETURN ... @@ -195,33 +173,30 @@ body: | ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr2 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr3 ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF2]](s1), %bb.0, %53(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %42(s1), %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -235,19 +210,19 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) - ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C7]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C6]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY2]] + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -258,25 +233,19 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3 - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1) - ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C7]](s1), %bb.3, [[C3]](s1), %bb.1 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_2]](s1) - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) @@ -292,23 +261,23 @@ body: | %7:_(s32) = COPY $sgpr2 %8:_(s32) = COPY $sgpr3 %9:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) - %10:_(s32) = G_CONSTANT i32 0 - %11:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_IMPLICIT_DEF + %11:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.3(0x50000000), %bb.5(0x30000000) - %12:_(s32) = G_PHI %13(s32), %bb.5, %10(s32), %bb.0 - %14:_(s32) = G_PHI %10(s32), %bb.0, %15(s32), %bb.5 - %16:_(s1) = G_CONSTANT i1 true - %17:_(s64) = G_SEXT %14(s32) - %18:_(s32) = G_CONSTANT i32 2 - %19:_(s64) = G_SHL %17, %18(s32) - %20:_(p1) = G_PTR_ADD %6, %19(s64) - %21:_(s32) = G_LOAD %20(p1) :: (load (s32), addrspace 1) - %22:_(s32) = G_CONSTANT i32 0 - %23:_(s1) = G_ICMP intpred(ne), %21(s32), %22 - G_BRCOND %23(s1), %bb.3 + %12:_(s32) = G_PHI %13(s32), %bb.5, %11(s32), %bb.0 + %14:_(s32) = G_PHI %11(s32), %bb.0, %15(s32), %bb.5 + %16:_(s64) = G_SEXT %14(s32) + %17:_(s32) = G_CONSTANT i32 2 + %18:_(s64) = G_SHL %16, %17(s32) + %19:_(p1) = G_PTR_ADD %6, %18(s64) + %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1) + %21:_(s32) = G_CONSTANT i32 0 + %22:_(s1) = G_ICMP intpred(ne), %20(s32), %21 + %23:_(s1) = G_CONSTANT i1 true + G_BRCOND %22(s1), %bb.3 G_BR %bb.5 bb.2: @@ -321,16 +290,16 @@ body: | bb.3: successors: %bb.5(0x80000000) - %25:_(s1) = G_CONSTANT i1 false - %26:_(s32) = G_CONSTANT i32 2 - %27:_(s64) = G_SHL %17, %26(s32) - %28:_(p1) = G_PTR_ADD %2, %27(s64) - %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1) - %30:_(s32) = G_CONSTANT i32 1 - %31:_(s32) = G_ADD %29, %30 - G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1) - %32:_(s32) = G_ADD %14, %30 - %33:_(s1) = G_ICMP intpred(ult), %14(s32), %3 + %25:_(s32) = G_CONSTANT i32 2 + %26:_(s64) = G_SHL %16, %25(s32) + %27:_(p1) = G_PTR_ADD %2, %26(s64) + %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1) + %29:_(s32) = G_CONSTANT i32 1 + %30:_(s32) = G_ADD %28, %29 + G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1) + %31:_(s32) = G_ADD %14, %29 + %32:_(s1) = G_ICMP intpred(ult), %14(s32), %3 + %33:_(s1) = G_CONSTANT i1 false G_BR %bb.5 bb.4: @@ -340,9 +309,9 @@ body: | bb.5: successors: %bb.6(0x04000000), %bb.1(0x7c000000) - %15:_(s32) = G_PHI %32(s32), %bb.3, %11(s32), %bb.1 - %35:_(s1) = G_PHI %25(s1), %bb.3, %16(s1), %bb.1 - %36:_(s1) = G_PHI %33(s1), %bb.3, %16(s1), %bb.1 + %15:_(s32) = G_PHI %31(s32), %bb.3, %10(s32), %bb.1 + %35:sreg_32_xm0_xexec(s1) = G_PHI %33(s1), %bb.3, %23(s1), %bb.1 + %36:_(s1) = G_PHI %32(s1), %bb.3, %23(s1), %bb.1 %13:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %12(s32) SI_LOOP %13(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.6 @@ -350,9 +319,7 @@ body: | bb.6: successors: %bb.2(0x40000000), %bb.4(0x40000000) - %37:sreg_32_xm0_xexec(s1) = G_PHI %35(s1), %bb.5 - %38:_(s32) = G_PHI %13(s32), %bb.5 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32) - %34:sreg_32_xm0_xexec(s32) = SI_IF %37(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s32) + %34:sreg_32_xm0_xexec(s32) = SI_IF %35(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec G_BR %bb.2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll index 1934958ea8f37..6f1797455c6a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll @@ -1,24 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s define void @temporal_divergent_i32(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s4, -1 -; GFX10-NEXT: v_mov_b32_e32 v3, s4 -; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: .LBB0_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v3, 1, v3 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v3 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 -; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s4 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0 +; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: flat_store_dword v[1:2], v3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: flat_store_dword v[1:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir index d1b473f2f41d8..996815e2d38fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir @@ -15,14 +15,14 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]] ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32) @@ -32,10 +32,8 @@ body: | ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 - ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) - ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[MV]](p0) :: (store (s32)) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) + ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: successors: %bb.1(0x80000000) @@ -45,14 +43,14 @@ body: | %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32) - %4:_(s32) = G_CONSTANT i32 0 - %5:_(s32) = G_CONSTANT i32 -1 + %4:_(s32) = G_CONSTANT i32 -1 + %5:_(s32) = G_CONSTANT i32 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0 - %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1 + %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 %10:_(s32) = G_CONSTANT i32 1 %9:_(s32) = G_ADD %8, %10 %11:_(s32) = G_UITOFP %9(s32) @@ -62,9 +60,7 @@ body: | G_BR %bb.2 bb.2: - %13:_(s32) = G_PHI %9(s32), %bb.1 - %14:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - G_STORE %13(s32), %3(p0) :: (store (s32)) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32) + G_STORE %9(s32), %3(p0) :: (store (s32)) SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir index 8f3495ea87eec..97dcd5084cacc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir @@ -597,7 +597,7 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.1, [[C1]](s32), %bb.0 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]] @@ -609,11 +609,9 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[C3]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[ADD]], [[C3]] ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -641,12 +639,10 @@ body: | G_BR %bb.2 bb.2: - %13:_(s32) = G_PHI %9(s32), %bb.1 - %14:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - %15:_(s32) = G_CONSTANT i32 10 - %16:_(s32) = G_MUL %13, %15 - G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32) + %13:_(s32) = G_CONSTANT i32 10 + %14:_(s32) = G_MUL %9, %13 + G_STORE %14(s32), %3(p1) :: (store (s32), addrspace 1) S_ENDPGM 0 ... @@ -677,7 +673,7 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %67(s32), %bb.3, [[C]](s32), %bb.0 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[PHI2]](s32) @@ -760,8 +756,7 @@ body: | ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -861,7 +856,6 @@ body: | G_BR %bb.3 bb.6: - %64:_(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll index 191739b37672e..a02e623efb3a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll @@ -462,8 +462,7 @@ define amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, p ; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1 ; NEW_RBS-NEXT: ; %bb.2: ; %exit ; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0 -; NEW_RBS-NEXT: v_mul_lo_u32 v0, v0, 10 +; NEW_RBS-NEXT: v_mul_lo_u32 v0, s0, 10 ; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off ; NEW_RBS-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir index 1b22ee4b3fffc..06b0b7269b224 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir @@ -971,12 +971,10 @@ body: | ; OLD_RBS-NEXT: G_BR %bb.2 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.2: - ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]] + ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[ADD]], [[COPY4]] ; OLD_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; OLD_RBS-NEXT: S_ENDPGM 0 ; @@ -995,7 +993,7 @@ body: | ; NEW_RBS-NEXT: bb.1: ; NEW_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; NEW_RBS-NEXT: {{ $}} - ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0 + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.1, [[C1]](s32), %bb.0 ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]] @@ -1008,12 +1006,11 @@ body: | ; NEW_RBS-NEXT: G_BR %bb.2 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.2: - ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 - ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) - ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY5]] + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY5]], [[COPY6]] ; NEW_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; NEW_RBS-NEXT: S_ENDPGM 0 bb.0: @@ -1041,12 +1038,10 @@ body: | G_BR %bb.2 bb.2: - %13:_(s32) = G_PHI %9(s32), %bb.1 - %14:_(s32) = G_PHI %7(s32), %bb.1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) - %15:_(s32) = G_CONSTANT i32 10 - %16:_(s32) = G_MUL %13, %15 - G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32) + %13:_(s32) = G_CONSTANT i32 10 + %14:_(s32) = G_MUL %9, %13 + G_STORE %14(s32), %3(p1) :: (store (s32), addrspace 1) S_ENDPGM 0 ... @@ -1167,8 +1162,7 @@ body: | ; OLD_RBS-NEXT: G_BR %bb.3 ; OLD_RBS-NEXT: {{ $}} ; OLD_RBS-NEXT: bb.6: - ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; OLD_RBS-NEXT: S_ENDPGM 0 ; ; NEW_RBS-LABEL: name: loop_with_2breaks @@ -1193,7 +1187,7 @@ body: | ; NEW_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 - ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %67(s32), %bb.3, [[C]](s32), %bb.0 ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 @@ -1285,8 +1279,7 @@ body: | ; NEW_RBS-NEXT: G_BR %bb.3 ; NEW_RBS-NEXT: {{ $}} ; NEW_RBS-NEXT: bb.6: - ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 - ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32) ; NEW_RBS-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -1386,7 +1379,6 @@ body: | G_BR %bb.3 bb.6: - %64:_(s32) = G_PHI %15(s32), %bb.3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) S_ENDPGM 0 ...