@@ -67,61 +67,18 @@ exit:
67
67
define void @test_replicating_store_x86_fp80_cost (i32 %n , ptr %dst ) #0 {
68
68
; COST-LABEL: define void @test_replicating_store_x86_fp80_cost(
69
69
; COST-SAME: i32 [[N:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
70
- ; COST-NEXT: [[ENTRY:.*:]]
71
- ; COST-NEXT: [[TMP0:%.*]] = add i32 [[N]], 2
72
- ; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
73
- ; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
74
- ; COST: [[VECTOR_SCEVCHECK]]:
75
- ; COST-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
76
- ; COST-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
77
- ; COST-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], 4294967295
78
- ; COST-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
79
- ; COST: [[VECTOR_PH]]:
80
- ; COST-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8
81
- ; COST-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
82
- ; COST-NEXT: br label %[[VECTOR_BODY:.*]]
83
- ; COST: [[VECTOR_BODY]]:
84
- ; COST-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
85
- ; COST-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
86
- ; COST-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
87
- ; COST-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
88
- ; COST-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
89
- ; COST-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[VEC_IND]] to <2 x i64>
90
- ; COST-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[STEP_ADD]] to <2 x i64>
91
- ; COST-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[STEP_ADD_2]] to <2 x i64>
92
- ; COST-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[STEP_ADD_3]] to <2 x i64>
93
- ; COST-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
94
- ; COST-NEXT: [[TMP9:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP8]]
95
- ; COST-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
96
- ; COST-NEXT: [[TMP11:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP10]]
97
- ; COST-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
98
- ; COST-NEXT: [[TMP13:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP12]]
99
- ; COST-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
100
- ; COST-NEXT: [[TMP15:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP14]]
101
- ; COST-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
102
- ; COST-NEXT: [[TMP17:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP16]]
103
- ; COST-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
104
- ; COST-NEXT: [[TMP19:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP18]]
105
- ; COST-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
106
- ; COST-NEXT: [[TMP21:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP20]]
107
- ; COST-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
70
+ ; COST-NEXT: [[ENTRY:.*]]:
71
+ ; COST-NEXT: br label %[[LOOP:.*]]
72
+ ; COST: [[LOOP]]:
73
+ ; COST-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
74
+ ; COST-NEXT: [[TMP22:%.*]] = zext i32 [[IV]] to i64
108
75
; COST-NEXT: [[TMP23:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP22]]
109
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP9]], align 16
110
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP11]], align 16
111
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP13]], align 16
112
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP15]], align 16
113
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP17]], align 16
114
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP19]], align 16
115
- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP21]], align 16
116
76
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP23]], align 16
117
- ; COST-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
118
- ; COST-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD_3]], splat (i32 2)
119
- ; COST-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
120
- ; COST-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
121
- ; COST: [[MIDDLE_BLOCK]]:
122
- ; COST-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
123
- ; COST-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
124
- ; COST: [[SCALAR_PH]]:
77
+ ; COST-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
78
+ ; COST-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], [[N]]
79
+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
80
+ ; COST: [[EXIT]]:
81
+ ; COST-NEXT: ret void
125
82
;
126
83
; FORCED-LABEL: define void @test_replicating_store_x86_fp80_cost(
127
84
; FORCED-SAME: i32 [[N:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
0 commit comments