Skip to content

Commit

Permalink
[SelectionDAG] Handle VSCALE in isKnownNeverZero (llvm#97789)
Browse files Browse the repository at this point in the history
VSCALE is by definition greater than zero, but this checks it via
getVScaleRange anyway.

The motivation for this is to be able to check if the EVL for a VP
strided load is non-zero in llvm#97394.

I added the tests to the RISC-V backend since the existing X86
known-never-zero.ll test crashed when trying to lower vscale for the
+sse2 RUN line.
  • Loading branch information
lukel97 committed Jul 5, 2024
1 parent b3fa2a6 commit e4b2842
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5623,6 +5623,15 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
case ISD::VSCALE: {
const Function &F = getMachineFunction().getFunction();
const APInt &Multiplier = Op.getConstantOperandAPInt(0);
ConstantRange CR =
getVScaleRange(&F, Op.getScalarValueSizeInBits()).multiply(Multiplier);
if (!CR.getUnsignedMin().isZero())
return true;
break;
}
}

return computeKnownBits(Op, Depth).isNonZero();
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s

; Use cttz to test if we properly prove never-zero. There is a very
; simple transform from cttz -> cttz_zero_undef if its operand is
; known never zero.

; Even without vscale_range, vscale is always guaranteed to be non-zero.
define i32 @vscale_known_nonzero() {
; CHECK-LABEL: vscale_known_nonzero:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: neg a1, a0
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: lui a1, 30667
; CHECK-NEXT: addiw a1, a1, 1329
; CHECK-NEXT: call __muldi3
; CHECK-NEXT: srliw a0, a0, 27
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0)
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: lbu a0, 0(a0)
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%x = call i32 @llvm.vscale()
%r = call i32 @llvm.cttz.i32(i32 %x, i1 false)
ret i32 %r
}

0 comments on commit e4b2842

Please sign in to comment.