Skip to content

Commit ab76686

Browse files
authored
[X86] Baseline test for "invalid operand order for fp16 vector comparison" issue (llvm#159786)
Despite the difference in the order of fcmp operands, `%lhs, %rhs` and`%rhs, %lhs`, generated assembly remains the same. This is a baseline test for llvm#159723
1 parent 6884cc7 commit ab76686

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

llvm/test/CodeGen/X86/pr159723.ll

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl| FileCheck %s
3+
4+
declare <8 x half> @test_call_8()
5+
6+
declare <16 x half> @test_call_16()
7+
8+
declare <32 x half> @test_call_32()
9+
10+
define <8 x i1> @test_cmp_v8half_ogt(<8 x half> %rhs, <8 x i1> %mask) nounwind {
11+
; CHECK-LABEL: test_cmp_v8half_ogt:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: subq $40, %rsp
14+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
15+
; CHECK-NEXT: vpsllw $15, %xmm1, %xmm0
16+
; CHECK-NEXT: vpmovw2m %xmm0, %k1
17+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
18+
; CHECK-NEXT: callq test_call_8@PLT
19+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
20+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 {%k1} # 16-byte Folded Reload
21+
; CHECK-NEXT: vpmovm2w %k0, %xmm0
22+
; CHECK-NEXT: addq $40, %rsp
23+
; CHECK-NEXT: retq
24+
%lhs = call <8 x half> @test_call_8()
25+
%comp = fcmp ogt <8 x half> %lhs, %rhs
26+
%res = and <8 x i1> %comp, %mask
27+
ret <8 x i1> %res
28+
}
29+
30+
define <8 x i1> @test_cmp_v8half_ogt_commute(<8 x half> %rhs, <8 x i1> %mask) nounwind {
31+
; CHECK-LABEL: test_cmp_v8half_ogt_commute:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: subq $40, %rsp
34+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
35+
; CHECK-NEXT: vpsllw $15, %xmm1, %xmm0
36+
; CHECK-NEXT: vpmovw2m %xmm0, %k1
37+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
38+
; CHECK-NEXT: callq test_call_8@PLT
39+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
40+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 {%k1} # 16-byte Folded Reload
41+
; CHECK-NEXT: vpmovm2w %k0, %xmm0
42+
; CHECK-NEXT: addq $40, %rsp
43+
; CHECK-NEXT: retq
44+
%lhs = call <8 x half> @test_call_8()
45+
%comp = fcmp ogt <8 x half> %rhs, %lhs
46+
%res = and <8 x i1> %comp, %mask
47+
ret <8 x i1> %res
48+
}
49+
50+
51+
define <16 x i1> @test_cmp_v16half_olt(<16 x half> %rhs, <16 x i1> %mask) nounwind {
52+
; CHECK-LABEL: test_cmp_v16half_olt:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: subq $56, %rsp
55+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
56+
; CHECK-NEXT: vpsllw $7, %xmm1, %xmm0
57+
; CHECK-NEXT: vpmovb2m %xmm0, %k1
58+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
59+
; CHECK-NEXT: callq test_call_16@PLT
60+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
61+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 {%k1} # 32-byte Folded Reload
62+
; CHECK-NEXT: vpmovm2b %k0, %xmm0
63+
; CHECK-NEXT: addq $56, %rsp
64+
; CHECK-NEXT: vzeroupper
65+
; CHECK-NEXT: retq
66+
%lhs = call <16 x half> @test_call_16()
67+
%comp = fcmp olt <16 x half> %lhs, %rhs
68+
%res = and <16 x i1> %comp, %mask
69+
ret <16 x i1> %res
70+
}
71+
72+
define <16 x i1> @test_cmp_v16half_olt_commute(<16 x half> %rhs, <16 x i1> %mask) nounwind {
73+
; CHECK-LABEL: test_cmp_v16half_olt_commute:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: subq $56, %rsp
76+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
77+
; CHECK-NEXT: vpsllw $7, %xmm1, %xmm0
78+
; CHECK-NEXT: vpmovb2m %xmm0, %k1
79+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
80+
; CHECK-NEXT: callq test_call_16@PLT
81+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
82+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 {%k1} # 32-byte Folded Reload
83+
; CHECK-NEXT: vpmovm2b %k0, %xmm0
84+
; CHECK-NEXT: addq $56, %rsp
85+
; CHECK-NEXT: vzeroupper
86+
; CHECK-NEXT: retq
87+
%lhs = call <16 x half> @test_call_16()
88+
%comp = fcmp olt <16 x half> %rhs, %lhs
89+
%res = and <16 x i1> %comp, %mask
90+
ret <16 x i1> %res
91+
}
92+
93+
define <32 x i1> @test_cmp_v32half_oge(<32 x half> %rhs, <32 x i1> %mask) nounwind {
94+
; CHECK-LABEL: test_cmp_v32half_oge:
95+
; CHECK: # %bb.0:
96+
; CHECK-NEXT: subq $88, %rsp
97+
; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
98+
; CHECK-NEXT: vpsllw $7, %ymm1, %ymm0
99+
; CHECK-NEXT: vpmovb2m %ymm0, %k1
100+
; CHECK-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
101+
; CHECK-NEXT: callq test_call_32@PLT
102+
; CHECK-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 4-byte Reload
103+
; CHECK-NEXT: vcmpleph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 {%k1} # 64-byte Folded Reload
104+
; CHECK-NEXT: vpmovm2b %k0, %ymm0
105+
; CHECK-NEXT: addq $88, %rsp
106+
; CHECK-NEXT: retq
107+
%lhs = call <32 x half> @test_call_32()
108+
%comp = fcmp oge <32 x half> %lhs, %rhs
109+
%res = and <32 x i1> %comp, %mask
110+
ret <32 x i1> %res
111+
}
112+
113+
define <32 x i1> @test_cmp_v32half_oge_commute(<32 x half> %rhs, <32 x i1> %mask) nounwind {
114+
; CHECK-LABEL: test_cmp_v32half_oge_commute:
115+
; CHECK: # %bb.0:
116+
; CHECK-NEXT: subq $88, %rsp
117+
; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
118+
; CHECK-NEXT: vpsllw $7, %ymm1, %ymm0
119+
; CHECK-NEXT: vpmovb2m %ymm0, %k1
120+
; CHECK-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
121+
; CHECK-NEXT: callq test_call_32@PLT
122+
; CHECK-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 4-byte Reload
123+
; CHECK-NEXT: vcmpleph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 {%k1} # 64-byte Folded Reload
124+
; CHECK-NEXT: vpmovm2b %k0, %ymm0
125+
; CHECK-NEXT: addq $88, %rsp
126+
; CHECK-NEXT: retq
127+
%lhs = call <32 x half> @test_call_32()
128+
%comp = fcmp oge <32 x half> %rhs, %lhs
129+
%res = and <32 x i1> %comp, %mask
130+
ret <32 x i1> %res
131+
}

0 commit comments

Comments
 (0)