Skip to content

Commit 7d1952a

Browse files
authored
[LoongArch] Fix assertion failure when vec-args are not fully passed in vec-regs (llvm#159568)
Fixes llvm#159529
1 parent 8323ff0 commit 7d1952a

File tree

3 files changed

+190
-5
lines changed

3 files changed

+190
-5
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7332,16 +7332,23 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
73327332
unsigned StoreSizeBytes = GRLen / 8;
73337333
Align StackAlign = Align(GRLen / 8);
73347334

7335-
if (ValVT == MVT::f32 && !UseGPRForFloat)
7335+
if (ValVT == MVT::f32 && !UseGPRForFloat) {
73367336
Reg = State.AllocateReg(ArgFPR32s);
7337-
else if (ValVT == MVT::f64 && !UseGPRForFloat)
7337+
} else if (ValVT == MVT::f64 && !UseGPRForFloat) {
73387338
Reg = State.AllocateReg(ArgFPR64s);
7339-
else if (ValVT.is128BitVector())
7339+
} else if (ValVT.is128BitVector()) {
73407340
Reg = State.AllocateReg(ArgVRs);
7341-
else if (ValVT.is256BitVector())
7341+
UseGPRForFloat = false;
7342+
StoreSizeBytes = 16;
7343+
StackAlign = Align(16);
7344+
} else if (ValVT.is256BitVector()) {
73427345
Reg = State.AllocateReg(ArgXRs);
7343-
else
7346+
UseGPRForFloat = false;
7347+
StoreSizeBytes = 32;
7348+
StackAlign = Align(32);
7349+
} else {
73447350
Reg = State.AllocateReg(ArgGPRs);
7351+
}
73457352

73467353
unsigned StackOffset =
73477354
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
4+
5+
define <64 x i64> @test1(<64 x i64> %0) nounwind {
6+
; LA32-LABEL: test1:
7+
; LA32: # %bb.0: # %entry
8+
; LA32-NEXT: addi.w $sp, $sp, -32
9+
; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
10+
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
11+
; LA32-NEXT: addi.w $fp, $sp, 32
12+
; LA32-NEXT: bstrins.w $sp, $zero, 4, 0
13+
; LA32-NEXT: xvld $xr8, $fp, 0
14+
; LA32-NEXT: xvld $xr9, $fp, 32
15+
; LA32-NEXT: xvld $xr10, $fp, 64
16+
; LA32-NEXT: xvld $xr11, $fp, 96
17+
; LA32-NEXT: xvld $xr12, $fp, 224
18+
; LA32-NEXT: xvld $xr13, $fp, 192
19+
; LA32-NEXT: xvld $xr14, $fp, 160
20+
; LA32-NEXT: xvld $xr15, $fp, 128
21+
; LA32-NEXT: xvst $xr12, $a0, 480
22+
; LA32-NEXT: xvst $xr13, $a0, 448
23+
; LA32-NEXT: xvst $xr14, $a0, 416
24+
; LA32-NEXT: xvst $xr15, $a0, 384
25+
; LA32-NEXT: xvst $xr11, $a0, 352
26+
; LA32-NEXT: xvst $xr10, $a0, 320
27+
; LA32-NEXT: xvst $xr9, $a0, 288
28+
; LA32-NEXT: xvst $xr8, $a0, 256
29+
; LA32-NEXT: xvst $xr7, $a0, 224
30+
; LA32-NEXT: xvst $xr6, $a0, 192
31+
; LA32-NEXT: xvst $xr5, $a0, 160
32+
; LA32-NEXT: xvst $xr4, $a0, 128
33+
; LA32-NEXT: xvst $xr3, $a0, 96
34+
; LA32-NEXT: xvst $xr2, $a0, 64
35+
; LA32-NEXT: xvst $xr1, $a0, 32
36+
; LA32-NEXT: xvst $xr0, $a0, 0
37+
; LA32-NEXT: addi.w $sp, $fp, -32
38+
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
39+
; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
40+
; LA32-NEXT: addi.w $sp, $sp, 32
41+
; LA32-NEXT: ret
42+
;
43+
; LA64-LABEL: test1:
44+
; LA64: # %bb.0: # %entry
45+
; LA64-NEXT: addi.d $sp, $sp, -32
46+
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
47+
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
48+
; LA64-NEXT: addi.d $fp, $sp, 32
49+
; LA64-NEXT: bstrins.d $sp, $zero, 4, 0
50+
; LA64-NEXT: xvld $xr8, $fp, 0
51+
; LA64-NEXT: xvld $xr9, $fp, 32
52+
; LA64-NEXT: xvld $xr10, $fp, 64
53+
; LA64-NEXT: xvld $xr11, $fp, 96
54+
; LA64-NEXT: xvld $xr12, $fp, 224
55+
; LA64-NEXT: xvld $xr13, $fp, 192
56+
; LA64-NEXT: xvld $xr14, $fp, 160
57+
; LA64-NEXT: xvld $xr15, $fp, 128
58+
; LA64-NEXT: xvst $xr12, $a0, 480
59+
; LA64-NEXT: xvst $xr13, $a0, 448
60+
; LA64-NEXT: xvst $xr14, $a0, 416
61+
; LA64-NEXT: xvst $xr15, $a0, 384
62+
; LA64-NEXT: xvst $xr11, $a0, 352
63+
; LA64-NEXT: xvst $xr10, $a0, 320
64+
; LA64-NEXT: xvst $xr9, $a0, 288
65+
; LA64-NEXT: xvst $xr8, $a0, 256
66+
; LA64-NEXT: xvst $xr7, $a0, 224
67+
; LA64-NEXT: xvst $xr6, $a0, 192
68+
; LA64-NEXT: xvst $xr5, $a0, 160
69+
; LA64-NEXT: xvst $xr4, $a0, 128
70+
; LA64-NEXT: xvst $xr3, $a0, 96
71+
; LA64-NEXT: xvst $xr2, $a0, 64
72+
; LA64-NEXT: xvst $xr1, $a0, 32
73+
; LA64-NEXT: xvst $xr0, $a0, 0
74+
; LA64-NEXT: addi.d $sp, $fp, -32
75+
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
76+
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
77+
; LA64-NEXT: addi.d $sp, $sp, 32
78+
; LA64-NEXT: ret
79+
entry:
80+
ret <64 x i64> %0
81+
}
82+
83+
define <32 x double> @test2(<32 x double> %0, <32 x double> %1) nounwind {
84+
; LA32-LABEL: test2:
85+
; LA32: # %bb.0: # %entry
86+
; LA32-NEXT: addi.w $sp, $sp, -32
87+
; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
88+
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
89+
; LA32-NEXT: addi.w $fp, $sp, 32
90+
; LA32-NEXT: bstrins.w $sp, $zero, 4, 0
91+
; LA32-NEXT: xvst $xr7, $a0, 224
92+
; LA32-NEXT: xvst $xr6, $a0, 192
93+
; LA32-NEXT: xvst $xr5, $a0, 160
94+
; LA32-NEXT: xvst $xr4, $a0, 128
95+
; LA32-NEXT: xvst $xr3, $a0, 96
96+
; LA32-NEXT: xvst $xr2, $a0, 64
97+
; LA32-NEXT: xvst $xr1, $a0, 32
98+
; LA32-NEXT: xvst $xr0, $a0, 0
99+
; LA32-NEXT: addi.w $sp, $fp, -32
100+
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
101+
; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
102+
; LA32-NEXT: addi.w $sp, $sp, 32
103+
; LA32-NEXT: ret
104+
;
105+
; LA64-LABEL: test2:
106+
; LA64: # %bb.0: # %entry
107+
; LA64-NEXT: addi.d $sp, $sp, -32
108+
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
109+
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
110+
; LA64-NEXT: addi.d $fp, $sp, 32
111+
; LA64-NEXT: bstrins.d $sp, $zero, 4, 0
112+
; LA64-NEXT: xvst $xr7, $a0, 224
113+
; LA64-NEXT: xvst $xr6, $a0, 192
114+
; LA64-NEXT: xvst $xr5, $a0, 160
115+
; LA64-NEXT: xvst $xr4, $a0, 128
116+
; LA64-NEXT: xvst $xr3, $a0, 96
117+
; LA64-NEXT: xvst $xr2, $a0, 64
118+
; LA64-NEXT: xvst $xr1, $a0, 32
119+
; LA64-NEXT: xvst $xr0, $a0, 0
120+
; LA64-NEXT: addi.d $sp, $fp, -32
121+
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
122+
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
123+
; LA64-NEXT: addi.d $sp, $sp, 32
124+
; LA64-NEXT: ret
125+
entry:
126+
ret <32 x double> %0
127+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
4+
5+
define <64 x i32> @test1(<64 x i32> %0) nounwind {
6+
; CHECK-LABEL: test1:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: vld $vr8, $sp, 0
9+
; CHECK-NEXT: vld $vr9, $sp, 16
10+
; CHECK-NEXT: vld $vr10, $sp, 32
11+
; CHECK-NEXT: vld $vr11, $sp, 48
12+
; CHECK-NEXT: vld $vr12, $sp, 112
13+
; CHECK-NEXT: vld $vr13, $sp, 96
14+
; CHECK-NEXT: vld $vr14, $sp, 80
15+
; CHECK-NEXT: vld $vr15, $sp, 64
16+
; CHECK-NEXT: vst $vr12, $a0, 240
17+
; CHECK-NEXT: vst $vr13, $a0, 224
18+
; CHECK-NEXT: vst $vr14, $a0, 208
19+
; CHECK-NEXT: vst $vr15, $a0, 192
20+
; CHECK-NEXT: vst $vr11, $a0, 176
21+
; CHECK-NEXT: vst $vr10, $a0, 160
22+
; CHECK-NEXT: vst $vr9, $a0, 144
23+
; CHECK-NEXT: vst $vr8, $a0, 128
24+
; CHECK-NEXT: vst $vr7, $a0, 112
25+
; CHECK-NEXT: vst $vr6, $a0, 96
26+
; CHECK-NEXT: vst $vr5, $a0, 80
27+
; CHECK-NEXT: vst $vr4, $a0, 64
28+
; CHECK-NEXT: vst $vr3, $a0, 48
29+
; CHECK-NEXT: vst $vr2, $a0, 32
30+
; CHECK-NEXT: vst $vr1, $a0, 16
31+
; CHECK-NEXT: vst $vr0, $a0, 0
32+
; CHECK-NEXT: ret
33+
entry:
34+
ret <64 x i32> %0
35+
}
36+
37+
define <16 x double> @test2(<16 x double> %0, <16 x double> %1) nounwind {
38+
; CHECK-LABEL: test2:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: vst $vr7, $a0, 112
41+
; CHECK-NEXT: vst $vr6, $a0, 96
42+
; CHECK-NEXT: vst $vr5, $a0, 80
43+
; CHECK-NEXT: vst $vr4, $a0, 64
44+
; CHECK-NEXT: vst $vr3, $a0, 48
45+
; CHECK-NEXT: vst $vr2, $a0, 32
46+
; CHECK-NEXT: vst $vr1, $a0, 16
47+
; CHECK-NEXT: vst $vr0, $a0, 0
48+
; CHECK-NEXT: ret
49+
entry:
50+
ret <16 x double> %0
51+
}

0 commit comments

Comments
 (0)