-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] fix SP recovery in varargs functions #114316
Conversation
@llvm/pr-subscribers-backend-risc-v Author: None (dlav-sc) ChangesThis patch fixes sp recovery in the epilogue in varargs functions when fp register is presented and second sp adjustment is applied. Full diff: https://github.com/llvm/llvm-project/pull/114316.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f5851f37154519..2ff78ce9ea9c65 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -809,9 +809,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
: getStackSizeWithRVVPadding(MF) -
RVFI->getReservedSpillsSize();
- uint64_t FPOffset = FirstSPAdjustAmount ? FirstSPAdjustAmount
- : getStackSizeWithRVVPadding(MF) -
- RVFI->getVarArgsSaveSize();
+ uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
bool RestoreFP = RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
diff --git a/llvm/test/CodeGen/RISCV/test.ll b/llvm/test/CodeGen/RISCV/test.ll
new file mode 100644
index 00000000000000..a996a8b0d70265
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/test.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -riscv-v-vector-bits-max=512 -mtriple=riscv64 -mattr=+m,+c,+v -O0 < %s | FileCheck --check-prefix=RV64V %s
+
+%struct.Foo = type { ptr, ptr }
+
+@x = dso_local global ptr null, align 8
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+; Function Attrs: mustprogress noinline optnone uwtable vscale_range(8,8)
+define dso_local void @_Z4vfooPcPKcPv(ptr noundef %0, ptr noundef %1, ptr noundef %2) #0 {
+; RV64V-LABEL: _Z4vfooPcPKcPv:
+; RV64V: # %bb.0:
+; RV64V-NEXT: addi sp, sp, -496
+; RV64V-NEXT: .cfi_def_cfa_offset 496
+; RV64V-NEXT: sd ra, 488(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 480(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 496
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: lui a3, 2
+; RV64V-NEXT: addiw a3, a3, -592
+; RV64V-NEXT: sub sp, sp, a3
+; RV64V-NEXT: lui a3, 2
+; RV64V-NEXT: addiw a3, a3, -120
+; RV64V-NEXT: sub a3, s0, a3
+; RV64V-NEXT: lui a4, 2
+; RV64V-NEXT: sub a4, s0, a4
+; RV64V-NEXT: sd a3, 104(a4) # 8-byte Folded Spill
+; RV64V-NEXT: mv a3, a2
+; RV64V-NEXT: lui a2, 2
+; RV64V-NEXT: sub a2, s0, a2
+; RV64V-NEXT: ld a2, 104(a2) # 8-byte Folded Reload
+; RV64V-NEXT: lui a4, 2
+; RV64V-NEXT: sub a4, s0, a4
+; RV64V-NEXT: sd a3, 112(a4) # 8-byte Folded Spill
+; RV64V-NEXT: mv a3, a0
+; RV64V-NEXT: lui a0, 2
+; RV64V-NEXT: sub a0, s0, a0
+; RV64V-NEXT: ld a0, 112(a0) # 8-byte Folded Reload
+; RV64V-NEXT: sd a3, -32(s0)
+; RV64V-NEXT: sd a1, -40(s0)
+; RV64V-NEXT: sd a0, -48(s0)
+; RV64V-NEXT: ld a0, -32(s0)
+; RV64V-NEXT: sd a0, 8(a2)
+; RV64V-NEXT: lui a0, 2
+; RV64V-NEXT: addiw a0, a0, -144
+; RV64V-NEXT: sub a0, s0, a0
+; RV64V-NEXT: sd a0, 16(a2)
+; RV64V-NEXT: lui a0, %hi(x)
+; RV64V-NEXT: lui a1, %hi(_ZL6x_implP3Foo)
+; RV64V-NEXT: addi a1, a1, %lo(_ZL6x_implP3Foo)
+; RV64V-NEXT: sd a1, %lo(x)(a0)
+; RV64V-NEXT: ld a1, -48(s0)
+; RV64V-NEXT: sd a1, 0(a2)
+; RV64V-NEXT: ld a1, %lo(x)(a0)
+; RV64V-NEXT: lui a0, 2
+; RV64V-NEXT: addiw a0, a0, -128
+; RV64V-NEXT: sub a0, s0, a0
+; RV64V-NEXT: jalr a1
+; RV64V-NEXT: addi sp, s0, -496
+; RV64V-NEXT: ld ra, 488(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 480(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 496
+; RV64V-NEXT: ret
+ %4 = alloca ptr, align 8
+ %5 = alloca ptr, align 8
+ %6 = alloca ptr, align 8
+ %7 = alloca [8000 x i8], align 1
+ %8 = alloca %struct.Foo, align 8
+ %9 = alloca ptr, align 8
+ store ptr %0, ptr %4, align 8
+ store ptr %1, ptr %5, align 8
+ store ptr %2, ptr %6, align 8
+ %10 = getelementptr inbounds nuw %struct.Foo, ptr %8, i32 0, i32 0
+ %11 = load ptr, ptr %4, align 8
+ store ptr %11, ptr %10, align 8
+ %12 = getelementptr inbounds nuw %struct.Foo, ptr %8, i32 0, i32 1
+ %13 = getelementptr inbounds [8000 x i8], ptr %7, i64 0, i64 0
+ store ptr %13, ptr %12, align 8
+ store ptr @_ZL6x_implP3Foo, ptr @x, align 8
+ call void @llvm.va_copy.p0(ptr %9, ptr %6)
+ %14 = load ptr, ptr @x, align 8
+ call void %14(ptr noundef %8)
+ call void @llvm.va_end.p0(ptr %9)
+ ret void
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable vscale_range(8,8)
+define internal void @_ZL6x_implP3Foo(ptr noundef %0) #1 {
+; RV64V-LABEL: _ZL6x_implP3Foo:
+; RV64V: # %bb.0:
+; RV64V-NEXT: addi sp, sp, -32
+; RV64V-NEXT: .cfi_def_cfa_offset 32
+; RV64V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 32
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: sd a0, -24(s0)
+; RV64V-NEXT: addi sp, s0, -32
+; RV64V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 32
+; RV64V-NEXT: ret
+ %2 = alloca ptr, align 8
+ store ptr %0, ptr %2, align 8
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare void @llvm.va_copy.p0(ptr, ptr) #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare void @llvm.va_end.p0(ptr) #2
+
+; Function Attrs: mustprogress noinline optnone uwtable vscale_range(8,8)
+define dso_local void @_Z3fooPKcz(ptr noundef %0, ...) #0 {
+; RV64V-LABEL: _Z3fooPKcz:
+; RV64V: # %bb.0:
+; RV64V-NEXT: addi sp, sp, -496
+; RV64V-NEXT: .cfi_def_cfa_offset 496
+; RV64V-NEXT: sd ra, 424(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 416(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -72
+; RV64V-NEXT: .cfi_offset s0, -80
+; RV64V-NEXT: addi s0, sp, 432
+; RV64V-NEXT: .cfi_def_cfa s0, 64
+; RV64V-NEXT: lui t0, 2
+; RV64V-NEXT: addiw t0, t0, -576
+; RV64V-NEXT: sub sp, sp, t0
+; RV64V-NEXT: sd a7, 56(s0)
+; RV64V-NEXT: sd a6, 48(s0)
+; RV64V-NEXT: sd a5, 40(s0)
+; RV64V-NEXT: sd a4, 32(s0)
+; RV64V-NEXT: sd a3, 24(s0)
+; RV64V-NEXT: sd a2, 16(s0)
+; RV64V-NEXT: sd a1, 8(s0)
+; RV64V-NEXT: sd a0, -32(s0)
+; RV64V-NEXT: addi a0, s0, 8
+; RV64V-NEXT: sd a0, -40(s0)
+; RV64V-NEXT: ld a1, -32(s0)
+; RV64V-NEXT: ld a2, -40(s0)
+; RV64V-NEXT: lui a0, 2
+; RV64V-NEXT: addiw a0, a0, -152
+; RV64V-NEXT: sub a0, s0, a0
+; RV64V-NEXT: call _Z4vfooPcPKcPv
+; RV64V-NEXT: addi sp, s0, -432
+; RV64V-NEXT: ld ra, 424(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 416(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 496
+; RV64V-NEXT: ret
+ %2 = alloca ptr, align 8
+ %3 = alloca ptr, align 8
+ %4 = alloca [8000 x i8], align 1
+ store ptr %0, ptr %2, align 8
+ call void @llvm.va_start.p0(ptr %3)
+ %5 = getelementptr inbounds [8000 x i8], ptr %4, i64 0, i64 0
+ %6 = load ptr, ptr %2, align 8
+ %7 = load ptr, ptr %3, align 8
+ call void @_Z4vfooPcPKcPv(ptr noundef %5, ptr noundef %6, ptr noundef %7)
+ call void @llvm.va_end.p0(ptr %3)
+ ret void
+}
+
+attributes #0 = { mustprogress noinline optnone uwtable vscale_range(8,8) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-b,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zacas,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #1 = { mustprogress noinline nounwind optnone uwtable vscale_range(8,8) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-b,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zacas,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #2 = { nocallback nofree nosync nounwind willreturn }
|
@eaeltsin FYI
Yeah, you are absolutely right. Sorry for causing problems. |
7389adf
to
c42aa82
Compare
d20f5a2
to
e2a1523
Compare
Do you plan to precommit the test to show the diff for making the fix? (Your commits here add the test after the fix) |
e2a1523
to
e48208c
Compare
Yep, take a look, please |
Thank you for the fix! |
9fab2f9
to
2b63ab4
Compare
Can you please merge this soon? Thanks! |
@eaeltsin there are no reviews yet, but you could provide one :) I haven't (yet) because I am not super familiar with the code affected any more, but the commit and the change in the test look good enough to land, from my perspective. |
Commit 1 should be a real commit to the tree, i.e. not squashed as part of merging this PR. Commits 2 and 3 should be one commit, i.e. squash and merge is right here. Though it's a bit odd you chose to split them out into separate commits in the PR in the first place. |
I've made a separate PR with test: #114970 |
It's LGTM and I will give formal LGTM once #114970 land and this PR rebased |
For future reference, this is something you can directly commit to |
2b63ab4
to
39f58fb
Compare
39f58fb
to
5d2c4df
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thanks for taking a look at the patch! |
This patch fixes sp recovery in the epilogue in varargs functions when fp register is presented and second sp adjustment is applied.
Source of the issue: #110809