diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 1cb0ba2fb36d7..3434f54534ef5 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -945,12 +945,12 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool isMemcpyOp = Opc == BPF::MEMCPY; -#ifndef NDEBUG bool isSelectRIOp = (Opc == BPF::Select_Ri || Opc == BPF::Select_Ri_64_32 || Opc == BPF::Select_Ri_32 || Opc == BPF::Select_Ri_32_64); +#ifndef NDEBUG if (!(isSelectRROp || isSelectRIOp || isMemcpyOp)) report_fatal_error("unhandled instruction type: " + Twine(Opc)); #endif @@ -1016,6 +1016,25 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, report_fatal_error("unimplemented select CondCode " + Twine(CC)); } + // Fold select(SETEQ, AND(a,b), 0, T, F) into JNE+swapped PHI so the + // existing AND+JNE→JSET peephole can eliminate the AND. + bool SwapSelectPHI = false; + if (CC == ISD::SETEQ && isSelectRIOp && !is32BitCmp) { + int64_t CheckImm = MI.getOperand(2).getImm(); + if (CheckImm == 0) { + Register LHSOrig = MI.getOperand(1).getReg(); + MachineRegisterInfo &MRI = F->getRegInfo(); + MachineInstr *DefMI = MRI.hasOneDef(LHSOrig) + ? &*MRI.def_instr_begin(LHSOrig) + : nullptr; + if (DefMI && DefMI->getParent() == BB && + (DefMI->getOpcode() == BPF::AND_rr || + DefMI->getOpcode() == BPF::AND_ri)) { + NewCC = BPF::JNE_ri; + SwapSelectPHI = true; + } + } + } Register LHS = MI.getOperand(1).getReg(); bool isSignedCmp = (CC == ISD::SETGT || CC == ISD::SETGE || @@ -1061,9 +1080,9 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // ... BB = Copy1MBB; BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg()) - .addReg(MI.getOperand(5).getReg()) + .addReg(MI.getOperand(SwapSelectPHI ? 4 : 5).getReg()) .addMBB(Copy0MBB) - .addReg(MI.getOperand(4).getReg()) + .addReg(MI.getOperand(SwapSelectPHI ? 5 : 4).getReg()) .addMBB(ThisMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index 8a7aad60a7a59..060b39ba2c1b4 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -396,18 +396,49 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov() { bool BPFMIPreEmitPeephole::foldBitTestBranchIntoJSet() { bool Changed = false; + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); for (MachineBasicBlock &MBB : *MF) { - auto getPrevNonDebugInstr = [&](MachineInstr &MI) -> MachineInstr * { - auto It = MI.getIterator(); - while (It != MBB.begin()) { + // Scan back from BrMI to find the AND defining DstReg. We skip over + // non-defining instructions between the AND and the branch, but bail + // if any of them clobber the AND's source register. Register aliases + // (e.g. w1 vs r1) are handled via regsOverlap. + auto findAndDefForJSet = [&](MachineInstr &BrMI, + Register DstReg) -> MachineInstr * { + for (auto It = BrMI.getIterator(); It != MBB.begin();) { --It; - if (!It->isDebugInstr()) - return &*It; + if (It->isDebugInstr()) + continue; + bool DefsDstReg = false; + for (const MachineOperand &MO : It->operands()) { + if (MO.isReg() && MO.isDef() && + TRI->regsOverlap(MO.getReg(), DstReg)) { + DefsDstReg = true; + break; + } + } + if (!DefsDstReg) + continue; + unsigned Opc = It->getOpcode(); + if (Opc != BPF::AND_rr && Opc != BPF::AND_rr_32 && + Opc != BPF::AND_ri && Opc != BPF::AND_ri_32) + return nullptr; + if (Opc == BPF::AND_rr || Opc == BPF::AND_rr_32) { + Register SrcReg = It->getOperand(2).getReg(); + for (auto Check = std::next(It); + Check != BrMI.getIterator(); ++Check) { + if (Check->isDebugInstr()) + continue; + for (const MachineOperand &CMO : Check->operands()) + if (CMO.isReg() && CMO.isDef() && + TRI->regsOverlap(CMO.getReg(), SrcReg)) + return nullptr; + } + } + return &*It; } return nullptr; }; - for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) { MachineInstr &BrMI = *MII++; @@ -440,7 +471,7 @@ bool BPFMIPreEmitPeephole::foldBitTestBranchIntoJSet() { if (!DstReg.isPhysical() || isPhysRegUsedAfter(DstReg, BrMI.getIterator())) continue; - MachineInstr *AndMI = getPrevNonDebugInstr(BrMI); + MachineInstr *AndMI = findAndDefForJSet(BrMI, DstReg); if (!AndMI || AndMI->getParent() != &MBB || !AndMI->getOperand(0).isReg() || !AndMI->getOperand(1).isReg()) continue; diff --git a/llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll b/llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll index 9998c985e2aee..59605ec59c6ce 100644 --- a/llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll +++ b/llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll @@ -66,9 +66,12 @@ lor.end: ; preds = %lor.end.critedge, % } ; CHECK: r[[LOAD:[0-9]+]] = *(u32 *)(r{{[0-9]+}} + 4) -; CHECK: r[[COPY:[0-9]+]] = r[[LOAD]] -; CHECK: r[[COPY]] &= 65536 -; CHECK: r[[LOAD]] &= 32768 +; CHECK: if r[[LOAD]] & 65536 goto [[LABEL1:LBB[0-9]+_[0-9]+]] +; CHECK: if r[[LOAD]] & 32768 goto [[LABEL2:LBB[0-9]+_[0-9]+]] +; CHECK: [[LABEL2]]: +; CHECK: r{{[0-9]+}} = *(u32 *)(r{{[0-9]+}} + 0) +; CHECK: [[LABEL1]]: +; CHECK: *(u32 *)(r10 - 4) = ; Function Attrs: nounwind readnone speculatable willreturn declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 diff --git a/llvm/test/CodeGen/BPF/jset_select.ll b/llvm/test/CodeGen/BPF/jset_select.ll new file mode 100644 index 0000000000000..2906f3e6c7f3b --- /dev/null +++ b/llvm/test/CodeGen/BPF/jset_select.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=bpf -mcpu=v2 -verify-machineinstrs | FileCheck %s +; +; Test that the JSET peephole optimisation fires for the select IR pattern. +; When both branch outcomes are compile-time constants, LLVM emits select +; instead of br. This should fold AND + JNE into JSET, eliminating the AND. +; +; Source: +; uint64_t select_jset(uint64_t x, uint64_t m) { +; return (x & m) == 0 ? 64 : 32; +; } +; +; uint32_t select_jset_32_zero(uint32_t x, uint32_t m) { +; return (x & m) == 0 ? 64 : 32; +; } + +; Function Attrs: norecurse nounwind readnone +define i64 @select_jset(i64 %0, i64 %1) { + %3 = and i64 %1, %0 + %4 = icmp eq i64 %3, 0 + %5 = select i1 %4, i64 64, i64 32 +; CHECK-LABEL: select_jset: +; CHECK: r0 = 32 +; CHECK-NEXT: if r{{[0-9]+}} & r{{[0-9]+}} goto [[LABEL:LBB0_[0-9]+]] +; CHECK: r0 = 64 +; CHECK-NEXT: [[LABEL]]: +; CHECK-NEXT: exit +; CHECK-NOT: &= + ret i64 %5 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @select_jset_32_zero(i32 %0, i32 %1) { + %3 = and i32 %1, %0 + %4 = icmp eq i32 %3, 0 + %5 = select i1 %4, i32 64, i32 32 +; CHECK-LABEL: select_jset_32_zero: +; CHECK: r{{[0-9]+}} &= r{{[0-9]+}} +; CHECK-NEXT: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK-NEXT: r0 = 64 +; CHECK-NEXT: if r{{[0-9]+}} == 0 goto [[LABEL32:LBB[0-9]+_[0-9]+]] +; CHECK: r0 = 32 +; CHECK-NEXT: [[LABEL32]]: +; CHECK-NEXT: exit +; CHECK-NOT: if r{{[0-9]+}} & r{{[0-9]+}} goto + ret i32 %5 +} diff --git a/llvm/test/CodeGen/BPF/setcc.ll b/llvm/test/CodeGen/BPF/setcc.ll index 0e6103e3f3b05..66877fb8c2ccc 100644 --- a/llvm/test/CodeGen/BPF/setcc.ll +++ b/llvm/test/CodeGen/BPF/setcc.ll @@ -8,8 +8,18 @@ define i16 @sccweqand(i16 %a, i16 %b) nounwind { ret i16 %t3 } ; CHECK-LABEL: sccweqand: -; CHECK-V1: if r1 == 0 -; CHECK-V2: if r1 == 0 +; CHECK-V1: r0 = 0 +; CHECK-V1-NEXT: if r1 & 65535 goto [[LABEL_EQ:LBB[0-9]+_[0-9]+]] +; CHECK-V1-NEXT: # %bb.1: +; CHECK-V1-NEXT: r0 = 1 +; CHECK-V1-NEXT: [[LABEL_EQ]]: +; CHECK-V1-NEXT: exit +; CHECK-V2: r0 = 0 +; CHECK-V2-NEXT: if r1 & 65535 goto [[LABEL_EQ2:LBB[0-9]+_[0-9]+]] +; CHECK-V2-NEXT: # %bb.1: +; CHECK-V2-NEXT: r0 = 1 +; CHECK-V2-NEXT: [[LABEL_EQ2]]: +; CHECK-V2-NEXT: exit define i16 @sccwneand(i16 %a, i16 %b) nounwind { %t1 = and i16 %a, %b @@ -18,8 +28,18 @@ define i16 @sccwneand(i16 %a, i16 %b) nounwind { ret i16 %t3 } ; CHECK-LABEL: sccwneand: -; CHECK-V1: if r1 != 0 -; CHECK-V2: if r1 != 0 +; CHECK-V1: r0 = 1 +; CHECK-V1-NEXT: if r1 & 65535 goto [[LABEL_NE:LBB[0-9]+_[0-9]+]] +; CHECK-V1-NEXT: # %bb.1: +; CHECK-V1-NEXT: r0 = 0 +; CHECK-V1-NEXT: [[LABEL_NE]]: +; CHECK-V1-NEXT: exit +; CHECK-V2: r0 = 1 +; CHECK-V2-NEXT: if r1 & 65535 goto [[LABEL_NE2:LBB[0-9]+_[0-9]+]] +; CHECK-V2-NEXT: # %bb.1: +; CHECK-V2-NEXT: r0 = 0 +; CHECK-V2-NEXT: [[LABEL_NE2]]: +; CHECK-V2-NEXT: exit define i16 @sccwne(i16 %a, i16 %b) nounwind { %t1 = icmp ne i16 %a, %b diff --git a/llvm/test/CodeGen/MIR/BPF/jset-select-clobber.mir b/llvm/test/CodeGen/MIR/BPF/jset-select-clobber.mir new file mode 100644 index 0000000000000..438d3afe61a7c --- /dev/null +++ b/llvm/test/CodeGen/MIR/BPF/jset-select-clobber.mir @@ -0,0 +1,30 @@ +# RUN: llc -o - %s -mtriple=bpf -run-pass=bpf-mi-pemit-peephole | FileCheck %s +# +# Verify that the fold does not fire when an intervening instruction clobbers +# the AND's source register. The AND and JNE must survive unchanged. +# +--- | + define i64 @select_jset_clobber(i64 %0, i64 %1) { ret i64 0 } +... +--- +name: select_jset_clobber +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: $r1, $r2 + $r2 = AND_rr $r2, $r1 + $r1 = MOV_ri 0 + $r0 = MOV_ri 32 + JNE_ri killed $r2, 0, %bb.2 + bb.1: + successors: %bb.2 + $r0 = MOV_ri 64 + bb.2: + liveins: $r0 + RET implicit $r0 +# CHECK-LABEL: name: select_jset_clobber +# CHECK: $r2 = AND_rr $r2, $r1 +# CHECK: $r1 = MOV_ri 0 +# CHECK: JNE_ri killed $r2, 0, %bb.2 +# CHECK-NOT: JSET_rr diff --git a/llvm/test/CodeGen/MIR/BPF/jset-select-ri.mir b/llvm/test/CodeGen/MIR/BPF/jset-select-ri.mir new file mode 100644 index 0000000000000..bfbd4d7980f17 --- /dev/null +++ b/llvm/test/CodeGen/MIR/BPF/jset-select-ri.mir @@ -0,0 +1,27 @@ +# RUN: llc -o - %s -mtriple=bpf -run-pass=bpf-mi-pemit-peephole | FileCheck %s +# +# Verify that AND_ri + JNE_ri folds into JSET_ri when the mask is an immediate. +# +--- | + define i64 @select_jset_ri(i64 %0) { ret i64 0 } +... +--- +name: select_jset_ri +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: $r1 + $r1 = AND_ri killed $r1, 255 + $r0 = MOV_ri 32 + JNE_ri killed $r1, 0, %bb.2 + bb.1: + successors: %bb.2 + $r0 = MOV_ri 64 + bb.2: + liveins: $r0 + RET implicit $r0 +# CHECK-LABEL: name: select_jset_ri +# CHECK: $r0 = MOV_ri 32 +# CHECK-NEXT: JSET_ri $r1, 255, %bb.2 +# CHECK-NOT: AND_ri diff --git a/llvm/test/CodeGen/MIR/BPF/jset-select-rr.mir b/llvm/test/CodeGen/MIR/BPF/jset-select-rr.mir new file mode 100644 index 0000000000000..7e32e2112bef1 --- /dev/null +++ b/llvm/test/CodeGen/MIR/BPF/jset-select-rr.mir @@ -0,0 +1,28 @@ +# RUN: llc -o - %s -mtriple=bpf -run-pass=bpf-mi-pemit-peephole | FileCheck %s +# +# Verify that AND_rr + JNE_ri folds into JSET_rr for the select IR path. +# The AND should be eliminated and JSET emitted in its place. +# +--- | + define i64 @select_jset(i64 %0, i64 %1) { ret i64 0 } +... +--- +name: select_jset +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: $r1, $r2 + $r2 = AND_rr killed $r2, killed $r1 + $r0 = MOV_ri 32 + JNE_ri killed $r2, 0, %bb.2 + bb.1: + successors: %bb.2 + $r0 = MOV_ri 64 + bb.2: + liveins: $r0 + RET implicit $r0 +# CHECK-LABEL: name: select_jset +# CHECK: $r0 = MOV_ri 32 +# CHECK-NEXT: JSET_rr $r2, $r1, %bb.2 +# CHECK-NOT: AND_rr