Skip to content

Commit 81c0c73

Browse files
committed
[LV] Pass operand info to getMemoryOpCost in getMemInstScalarizationCost.
Pass operand info to getMemoryOpCost in getMemInstScalarizationCost. This matches the behavior in VPReplicateRecipe::computeCost.
1 parent 93f9ca2 commit 81c0c73

File tree

2 files changed

+14
-56
lines changed

2 files changed

+14
-56
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5152,9 +5152,10 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
51525152
// Don't pass *I here, since it is scalar but will actually be part of a
51535153
// vectorized loop where the user of it is a vectorized instruction.
51545154
const Align Alignment = getLoadStoreAlignment(I);
5155-
Cost += VF.getFixedValue() * TTI.getMemoryOpCost(I->getOpcode(),
5156-
ValTy->getScalarType(),
5157-
Alignment, AS, CostKind);
5155+
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
5156+
Cost += VF.getFixedValue() *
5157+
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
5158+
AS, CostKind, OpInfo);
51585159

51595160
// Get the overhead of the extractelement and insertelement instructions
51605161
// we might create due to scalarization.

llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll

Lines changed: 10 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -67,61 +67,18 @@ exit:
6767
define void @test_replicating_store_x86_fp80_cost(i32 %n, ptr %dst) #0 {
6868
; COST-LABEL: define void @test_replicating_store_x86_fp80_cost(
6969
; COST-SAME: i32 [[N:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
70-
; COST-NEXT: [[ENTRY:.*:]]
71-
; COST-NEXT: [[TMP0:%.*]] = add i32 [[N]], 2
72-
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
73-
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
74-
; COST: [[VECTOR_SCEVCHECK]]:
75-
; COST-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
76-
; COST-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
77-
; COST-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], 4294967295
78-
; COST-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
79-
; COST: [[VECTOR_PH]]:
80-
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8
81-
; COST-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
82-
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
83-
; COST: [[VECTOR_BODY]]:
84-
; COST-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
85-
; COST-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
86-
; COST-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
87-
; COST-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
88-
; COST-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
89-
; COST-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[VEC_IND]] to <2 x i64>
90-
; COST-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[STEP_ADD]] to <2 x i64>
91-
; COST-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[STEP_ADD_2]] to <2 x i64>
92-
; COST-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[STEP_ADD_3]] to <2 x i64>
93-
; COST-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
94-
; COST-NEXT: [[TMP9:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP8]]
95-
; COST-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
96-
; COST-NEXT: [[TMP11:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP10]]
97-
; COST-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
98-
; COST-NEXT: [[TMP13:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP12]]
99-
; COST-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
100-
; COST-NEXT: [[TMP15:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP14]]
101-
; COST-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
102-
; COST-NEXT: [[TMP17:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP16]]
103-
; COST-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
104-
; COST-NEXT: [[TMP19:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP18]]
105-
; COST-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
106-
; COST-NEXT: [[TMP21:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP20]]
107-
; COST-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
70+
; COST-NEXT: [[ENTRY:.*]]:
71+
; COST-NEXT: br label %[[LOOP:.*]]
72+
; COST: [[LOOP]]:
73+
; COST-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
74+
; COST-NEXT: [[TMP22:%.*]] = zext i32 [[IV]] to i64
10875
; COST-NEXT: [[TMP23:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP22]]
109-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP9]], align 16
110-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP11]], align 16
111-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP13]], align 16
112-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP15]], align 16
113-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP17]], align 16
114-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP19]], align 16
115-
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP21]], align 16
11676
; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP23]], align 16
117-
; COST-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
118-
; COST-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD_3]], splat (i32 2)
119-
; COST-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
120-
; COST-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
121-
; COST: [[MIDDLE_BLOCK]]:
122-
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
123-
; COST-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
124-
; COST: [[SCALAR_PH]]:
77+
; COST-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
78+
; COST-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], [[N]]
79+
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
80+
; COST: [[EXIT]]:
81+
; COST-NEXT: ret void
12582
;
12683
; FORCED-LABEL: define void @test_replicating_store_x86_fp80_cost(
12784
; FORCED-SAME: i32 [[N:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {

0 commit comments

Comments
 (0)