-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Support non-power-of-2 types when expanding memcmp #114971
base: users/wangpc-pp/spr/main.riscv-support-non-power-of-2-types-when-expanding-memcmp
Are you sure you want to change the base?
Conversation
Created using spr 1.3.6-beta.1
@llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) ChangesWe can convert non-power-of-2 types into extended value types Patch is 253.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114971.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 72d85491a6f77d..e67515c24e8341 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14492,10 +14492,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
return SDValue();
unsigned OpSize = OpVT.getSizeInBits();
- // TODO: Support non-power-of-2 types.
- if (!isPowerOf2_32(OpSize))
- return SDValue();
-
// The size should be larger than XLen and smaller than the maximum vector
// size.
if (OpSize <= Subtarget.getXLen() ||
@@ -14517,8 +14513,8 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
return SDValue();
unsigned VecSize = OpSize / 8;
- EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
- EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
+ EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
SDValue VecX = DAG.getBitcast(VecVT, X);
SDValue VecY = DAG.getBitcast(VecVT, Y);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 637b670cf041a5..c65feb9755633c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2507,20 +2507,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
else
Options.LoadSizes = {4, 2, 1};
if (IsZeroCmp && ST->hasVInstructions()) {
- unsigned RealMinVLen = ST->getRealMinVLen();
- // Support Fractional LMULs if the lengths are larger than XLen.
- // TODO: Support non-power-of-2 types.
- for (unsigned LMUL = 8; LMUL >= 2; LMUL /= 2) {
- unsigned Len = RealMinVLen / LMUL;
- if (Len > ST->getXLen())
- Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
- }
- for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors();
- LMUL *= 2) {
- unsigned Len = RealMinVLen * LMUL;
- if (Len > ST->getXLen())
- Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
- }
+ unsigned VLenB = ST->getRealMinVLen() / 8;
+ for (unsigned Size = ST->getXLen() / 8 + 1;
+ Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
+ Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
}
return Options;
}
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index b39e6a425d702f..800b5a80fdf160 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -739,31 +739,14 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_5:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-ALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
@@ -797,17 +780,102 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
-; CHECK-UNALIGNED-LABEL: bcmp_size_5:
-; CHECK-UNALIGNED: # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT: lbu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT: lbu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-NEXT: snez a0, a0
-; CHECK-UNALIGNED-NEXT: ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: ret
entry:
%bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5)
ret i32 %bcmp
@@ -1020,37 +1088,14 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_6:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 5(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 5(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a0, a0, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a4
-; CHECK-ALIGNED-RV32-V-NEXT: slli a1, a1, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a5
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-ALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
@@ -1090,17 +1135,102 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
-; CHECK-UNALIGNED-LABEL: bcmp_size_6:
-; CHECK-UNALIGNED: # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT: lhu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT: lhu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-NEXT: snez a0, a0
-; CHECK-UNALIGNED-NEXT: ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: ret
entry:
%bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6)
ret i32 %bcmp
@@ -1337,41 +1467,14 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_7:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 5(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 6(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 5(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 6(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a5
-; CHECK-ALIGNED-RV32-V-NEXT: xor a3, a3, a4
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a3, a0
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-ALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
@@ -1415,17 +1518,102 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
-; CHECK-UNALIGNED-LABEL: bcmp_size_7:
-; CHECK-UNALIGNED: # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT: lw a0, 3(a0)
-; CHECK-UNALIGNED-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT: lw a1, 3(a1)
-; CHECK-UNALIGNED-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-NEXT: snez a0, a0
-; CHECK-UNALIGNED-NEXT: ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT: lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0
+; C...
[truncated]
|
Created using spr 1.3.6-beta.1
Created using spr 1.3.6-beta.1
; CHECK-UNALIGNED-RV32-V-NEXT: vmset.m v0 | ||
; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 | ||
; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma | ||
; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think anything in this PR is causing this, but why is the vcpop.m expanded with a mask?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is the lowering of ISD::VECREDUCE_OR
nodes.
The VL
parameter comes from getDefaultVLOps
and its value is 16 because the vector type v15i8
has been widened to v16i8
.
We may use VP_REDUCE_OR
here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nope, it doesn't work. We should fix it in another place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be fixed by using VP nodes and #115162.
Created using spr 1.3.6-beta.1
We can convert non-power-of-2 types into extended value types
and then they will be widen.