Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions llvm/lib/Target/BPF/BPFISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,12 +945,12 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

bool isMemcpyOp = Opc == BPF::MEMCPY;

#ifndef NDEBUG
bool isSelectRIOp = (Opc == BPF::Select_Ri ||
Opc == BPF::Select_Ri_64_32 ||
Opc == BPF::Select_Ri_32 ||
Opc == BPF::Select_Ri_32_64);

#ifndef NDEBUG
if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))
report_fatal_error("unhandled instruction type: " + Twine(Opc));
#endif
Expand Down Expand Up @@ -1016,6 +1016,25 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
report_fatal_error("unimplemented select CondCode " + Twine(CC));
}

// Fold select(SETEQ, AND(a,b), 0, T, F) into JNE+swapped PHI so the
// existing AND+JNE→JSET peephole can eliminate the AND.
bool SwapSelectPHI = false;
if (CC == ISD::SETEQ && isSelectRIOp && !is32BitCmp) {
int64_t CheckImm = MI.getOperand(2).getImm();
if (CheckImm == 0) {
Register LHSOrig = MI.getOperand(1).getReg();
MachineRegisterInfo &MRI = F->getRegInfo();
MachineInstr *DefMI = MRI.hasOneDef(LHSOrig)
? &*MRI.def_instr_begin(LHSOrig)
: nullptr;
if (DefMI && DefMI->getParent() == BB &&
(DefMI->getOpcode() == BPF::AND_rr ||
DefMI->getOpcode() == BPF::AND_ri)) {
NewCC = BPF::JNE_ri;
SwapSelectPHI = true;
}
}
}
Register LHS = MI.getOperand(1).getReg();
bool isSignedCmp = (CC == ISD::SETGT ||
CC == ISD::SETGE ||
Expand Down Expand Up @@ -1061,9 +1080,9 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// ...
BB = Copy1MBB;
BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg())
.addReg(MI.getOperand(5).getReg())
.addReg(MI.getOperand(SwapSelectPHI ? 4 : 5).getReg())
.addMBB(Copy0MBB)
.addReg(MI.getOperand(4).getReg())
.addReg(MI.getOperand(SwapSelectPHI ? 5 : 4).getReg())
.addMBB(ThisMBB);

MI.eraseFromParent(); // The pseudo instruction is gone now.
Expand Down
45 changes: 38 additions & 7 deletions llvm/lib/Target/BPF/BPFMIPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,18 +396,49 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov() {

bool BPFMIPreEmitPeephole::foldBitTestBranchIntoJSet() {
bool Changed = false;
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();

for (MachineBasicBlock &MBB : *MF) {
auto getPrevNonDebugInstr = [&](MachineInstr &MI) -> MachineInstr * {
auto It = MI.getIterator();
while (It != MBB.begin()) {
// Scan back from BrMI to find the AND defining DstReg. We skip over
// non-defining instructions between the AND and the branch, but bail
// if any of them clobber the AND's source register. Register aliases
// (e.g. w1 vs r1) are handled via regsOverlap.
auto findAndDefForJSet = [&](MachineInstr &BrMI,
Register DstReg) -> MachineInstr * {
for (auto It = BrMI.getIterator(); It != MBB.begin();) {
--It;
if (!It->isDebugInstr())
return &*It;
if (It->isDebugInstr())
continue;
bool DefsDstReg = false;
for (const MachineOperand &MO : It->operands()) {
if (MO.isReg() && MO.isDef() &&
TRI->regsOverlap(MO.getReg(), DstReg)) {
DefsDstReg = true;
break;
}
}
if (!DefsDstReg)
continue;
unsigned Opc = It->getOpcode();
if (Opc != BPF::AND_rr && Opc != BPF::AND_rr_32 &&
Opc != BPF::AND_ri && Opc != BPF::AND_ri_32)
return nullptr;
if (Opc == BPF::AND_rr || Opc == BPF::AND_rr_32) {
Register SrcReg = It->getOperand(2).getReg();
for (auto Check = std::next(It);
Check != BrMI.getIterator(); ++Check) {
if (Check->isDebugInstr())
continue;
for (const MachineOperand &CMO : Check->operands())
if (CMO.isReg() && CMO.isDef() &&
TRI->regsOverlap(CMO.getReg(), SrcReg))
return nullptr;
}
}
return &*It;
}
return nullptr;
};

for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
MachineInstr &BrMI = *MII++;

Expand Down Expand Up @@ -440,7 +471,7 @@ bool BPFMIPreEmitPeephole::foldBitTestBranchIntoJSet() {
if (!DstReg.isPhysical() || isPhysRegUsedAfter(DstReg, BrMI.getIterator()))
continue;

MachineInstr *AndMI = getPrevNonDebugInstr(BrMI);
MachineInstr *AndMI = findAndDefForJSet(BrMI, DstReg);
if (!AndMI || AndMI->getParent() != &MBB || !AndMI->getOperand(0).isReg() ||
!AndMI->getOperand(1).isReg())
continue;
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ lor.end: ; preds = %lor.end.critedge, %
}

; CHECK: r[[LOAD:[0-9]+]] = *(u32 *)(r{{[0-9]+}} + 4)
; CHECK: r[[COPY:[0-9]+]] = r[[LOAD]]
; CHECK: r[[COPY]] &= 65536
; CHECK: r[[LOAD]] &= 32768
; CHECK: if r[[LOAD]] & 65536 goto
; CHECK: if r[[LOAD]] & 32768 goto

; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/BPF/jset_select.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; RUN: llc < %s -mtriple=bpf -mcpu=v2 -verify-machineinstrs | FileCheck %s
;
; Test that the JSET peephole optimisation fires for the select IR pattern.
; When both branch outcomes are compile-time constants, LLVM emits select
; instead of br. This should fold AND + JNE into JSET, eliminating the AND.
;
; Source:
; uint64_t select_jset(uint64_t x, uint64_t m) {
; return (x & m) == 0 ? 64 : 32;
; }
;
; uint32_t select_jset_32_zero(uint32_t x, uint32_t m) {
; return (x & m) == 0 ? 64 : 32;
; }

; Function Attrs: norecurse nounwind readnone
define i64 @select_jset(i64 %0, i64 %1) {
%3 = and i64 %1, %0
%4 = icmp eq i64 %3, 0
%5 = select i1 %4, i64 64, i64 32
; CHECK-LABEL: select_jset:
; CHECK: r0 = 32
; CHECK-NEXT: if r{{[0-9]+}} & r{{[0-9]+}} goto [[LABEL:LBB0_[0-9]+]]
; CHECK: r0 = 64
; CHECK-NEXT: [[LABEL]]:
; CHECK-NEXT: exit
; CHECK-NOT: &=
ret i64 %5
}

; Function Attrs: norecurse nounwind readnone
define i32 @select_jset_32_zero(i32 %0, i32 %1) {
%3 = and i32 %1, %0
%4 = icmp eq i32 %3, 0
%5 = select i1 %4, i32 64, i32 32
; CHECK-LABEL: select_jset_32_zero:
; CHECK: r{{[0-9]+}} &= r{{[0-9]+}}
; CHECK-NEXT: r{{[0-9]+}} <<= 32
; CHECK-NEXT: r{{[0-9]+}} >>= 32
; CHECK-NEXT: r0 = 64
; CHECK-NEXT: if r{{[0-9]+}} == 0 goto [[LABEL32:LBB[0-9]+_[0-9]+]]
; CHECK: r0 = 32
; CHECK-NEXT: [[LABEL32]]:
; CHECK-NEXT: exit
; CHECK-NOT: if r{{[0-9]+}} & r{{[0-9]+}} goto
ret i32 %5
}
28 changes: 24 additions & 4 deletions llvm/test/CodeGen/BPF/setcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,18 @@ define i16 @sccweqand(i16 %a, i16 %b) nounwind {
ret i16 %t3
}
; CHECK-LABEL: sccweqand:
; CHECK-V1: if r1 == 0
; CHECK-V2: if r1 == 0
; CHECK-V1: r0 = 0
; CHECK-V1-NEXT: if r1 & 65535 goto [[LABEL_EQ:LBB[0-9]+_[0-9]+]]
; CHECK-V1-NEXT: # %bb.1:
; CHECK-V1-NEXT: r0 = 1
; CHECK-V1-NEXT: [[LABEL_EQ]]:
; CHECK-V1-NEXT: exit
; CHECK-V2: r0 = 0
; CHECK-V2-NEXT: if r1 & 65535 goto [[LABEL_EQ2:LBB[0-9]+_[0-9]+]]
; CHECK-V2-NEXT: # %bb.1:
; CHECK-V2-NEXT: r0 = 1
; CHECK-V2-NEXT: [[LABEL_EQ2]]:
; CHECK-V2-NEXT: exit

define i16 @sccwneand(i16 %a, i16 %b) nounwind {
%t1 = and i16 %a, %b
Expand All @@ -18,8 +28,18 @@ define i16 @sccwneand(i16 %a, i16 %b) nounwind {
ret i16 %t3
}
; CHECK-LABEL: sccwneand:
; CHECK-V1: if r1 != 0
; CHECK-V2: if r1 != 0
; CHECK-V1: r0 = 1
; CHECK-V1-NEXT: if r1 & 65535 goto [[LABEL_NE:LBB[0-9]+_[0-9]+]]
; CHECK-V1-NEXT: # %bb.1:
; CHECK-V1-NEXT: r0 = 0
; CHECK-V1-NEXT: [[LABEL_NE]]:
; CHECK-V1-NEXT: exit
; CHECK-V2: r0 = 1
; CHECK-V2-NEXT: if r1 & 65535 goto [[LABEL_NE2:LBB[0-9]+_[0-9]+]]
; CHECK-V2-NEXT: # %bb.1:
; CHECK-V2-NEXT: r0 = 0
; CHECK-V2-NEXT: [[LABEL_NE2]]:
; CHECK-V2-NEXT: exit

define i16 @sccwne(i16 %a, i16 %b) nounwind {
%t1 = icmp ne i16 %a, %b
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/BPF/warn-call.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; RUN: not llc -mtriple=bpfel < %s 2>&1 >/dev/null | FileCheck %s
; RUN: llc -mtriple=bpfel < %s | FileCheck %s

; CHECK: error: warn_call.c
; CHECK: built-in function 'memcpy'
; CHECK: call memcpy
define ptr @warn(ptr returned, ptr, i64) local_unnamed_addr #0 !dbg !6 {
tail call void @llvm.dbg.value(metadata ptr %0, i64 0, metadata !14, metadata !17), !dbg !18
tail call void @llvm.dbg.value(metadata ptr %1, i64 0, metadata !15, metadata !17), !dbg !19
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/MIR/BPF/jset-select-clobber.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# RUN: llc -o - %s -mtriple=bpf -run-pass=bpf-mi-pemit-peephole | FileCheck %s
#
# Verify that the fold does not fire when an intervening instruction clobbers
# the AND's source register. The AND and JNE must survive unchanged.
#
--- |
define i64 @select_jset_clobber(i64 %0, i64 %1) { ret i64 0 }
...
---
name: select_jset_clobber
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1, %bb.2
liveins: $r1, $r2
$r2 = AND_rr $r2, $r1
$r1 = MOV_ri 0
$r0 = MOV_ri 32
JNE_ri killed $r2, 0, %bb.2
bb.1:
successors: %bb.2
$r0 = MOV_ri 64
bb.2:
liveins: $r0
RET implicit $r0
# CHECK-LABEL: name: select_jset_clobber
# CHECK: $r2 = AND_rr $r2, $r1
# CHECK: $r1 = MOV_ri 0
# CHECK: JNE_ri killed $r2, 0, %bb.2
# CHECK-NOT: JSET_rr
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/MIR/BPF/jset-select-ri.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# RUN: llc -o - %s -mtriple=bpf -run-pass=bpf-mi-pemit-peephole | FileCheck %s
#
# Verify that AND_ri + JNE_ri folds into JSET_ri when the mask is an immediate.
#
--- |
define i64 @select_jset_ri(i64 %0) { ret i64 0 }
...
---
name: select_jset_ri
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1, %bb.2
liveins: $r1
$r1 = AND_ri killed $r1, 255
$r0 = MOV_ri 32
JNE_ri killed $r1, 0, %bb.2
bb.1:
successors: %bb.2
$r0 = MOV_ri 64
bb.2:
liveins: $r0
RET implicit $r0
# CHECK-LABEL: name: select_jset_ri
# CHECK: $r0 = MOV_ri 32
# CHECK-NEXT: JSET_ri $r1, 255, %bb.2
# CHECK-NOT: AND_ri
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/MIR/BPF/jset-select-rr.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# RUN: llc -o - %s -mtriple=bpf -run-pass=bpf-mi-pemit-peephole | FileCheck %s
#
# Verify that AND_rr + JNE_ri folds into JSET_rr for the select IR path.
# The AND should be eliminated and JSET emitted in its place.
#
--- |
define i64 @select_jset(i64 %0, i64 %1) { ret i64 0 }
...
---
name: select_jset
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1, %bb.2
liveins: $r1, $r2
$r2 = AND_rr killed $r2, killed $r1
$r0 = MOV_ri 32
JNE_ri killed $r2, 0, %bb.2
bb.1:
successors: %bb.2
$r0 = MOV_ri 64
bb.2:
liveins: $r0
RET implicit $r0
# CHECK-LABEL: name: select_jset
# CHECK: $r0 = MOV_ri 32
# CHECK-NEXT: JSET_rr $r2, $r1, %bb.2
# CHECK-NOT: AND_rr