Skip to content

[SLP]Initial support for (masked)loads + compress and (masked)interleaved #132099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
338 changes: 318 additions & 20 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,16 @@ define void @test() {
; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc ninf nsz arcp contract afn float [[GEPLOAD1612]], [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc ninf nsz arcp contract afn <16 x float> [[TMP4]], [[TMP0]]
; CHECK-NEXT: store <16 x float> [[TMP6]], ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 2928), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1272), align 16
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1288), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1296), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1304), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1272), align 16
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1620), align 4
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP8]], <16 x i32> <i32 poison, i32 0, i32 2, i32 1, i32 0, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP7]], <16 x i32> <i32 poison, i32 0, i32 20, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP12]], <16 x i32> <i32 1, i32 1, i32 17, i32 17, i32 18, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 19, i32 19, i32 19, i32 19>
; CHECK-NEXT: [[TMP15:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP12]], <8 x float> [[TMP13]], i64 8)
; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP15]], <4 x float> [[TMP7]], i64 0)
; CHECK-NEXT: [[TMP17:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP16]], <2 x float> [[TMP9]], i64 6)
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP10]], <16 x i32> <i32 1, i32 1, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 19, i32 19, i32 19, i32 19, i32 18>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18>
; CHECK-NEXT: [[TMP18:%.*]] = fmul reassoc ninf nsz arcp contract afn <16 x float> [[TMP14]], [[TMP17]]
; CHECK-NEXT: store <16 x float> [[TMP18]], ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 2992), align 16
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x float> [[TMP18]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 15, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
; CHECK-NEXT: store <16 x float> [[TMP15]], ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 2992), align 16
; CHECK-NEXT: ret void
;
alloca_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,9 @@ define void @test() {
; CHECK-NEXT: [[COND_IN_V:%.*]] = select i1 false, ptr null, ptr null
; CHECK-NEXT: br label [[BB:%.*]]
; CHECK: bb:
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[COND_IN_V]], align 8
; CHECK-NEXT: [[BV:%.*]] = icmp eq i64 [[V]], 0
; CHECK-NEXT: [[IN_1:%.*]] = getelementptr i64, ptr [[COND_IN_V]], i64 4
; CHECK-NEXT: [[V_1:%.*]] = load i64, ptr [[IN_1]], align 8
; CHECK-NEXT: [[BV_1:%.*]] = icmp eq i64 [[V_1]], 0
; CHECK-NEXT: [[IN_2:%.*]] = getelementptr i64, ptr [[COND_IN_V]], i64 8
; CHECK-NEXT: [[V_2:%.*]] = load i64, ptr [[IN_2]], align 8
; CHECK-NEXT: [[BV_2:%.*]] = icmp eq i64 [[V_2]], 0
; CHECK-NEXT: [[IN_3:%.*]] = getelementptr i64, ptr [[COND_IN_V]], i64 12
; CHECK-NEXT: [[V_3:%.*]] = load i64, ptr [[IN_3]], align 8
; CHECK-NEXT: [[BV_3:%.*]] = icmp eq i64 [[V_3]], 0
; CHECK-NEXT: [[TMP0:%.*]] = call <13 x i64> @llvm.masked.load.v13i64.p0(ptr [[COND_IN_V]], i32 8, <13 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <13 x i64> poison)
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <13 x i64> [[TMP0]], <13 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
; CHECK-NEXT: ret void
;
; CHECK-SLP-THRESHOLD-LABEL: define void @test
Expand All @@ -28,11 +20,9 @@ define void @test() {
; CHECK-SLP-THRESHOLD-NEXT: [[COND_IN_V:%.*]] = select i1 false, ptr null, ptr null
; CHECK-SLP-THRESHOLD-NEXT: br label [[BB:%.*]]
; CHECK-SLP-THRESHOLD: bb:
; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0
; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 12, i64 8, i64 4, i64 0>
; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> splat (i1 true), <4 x i64> poison)
; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer
; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = call <13 x i64> @llvm.masked.load.v13i64.p0(ptr [[COND_IN_V]], i32 8, <13 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <13 x i64> poison)
; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <13 x i64> [[TMP0]], <13 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
; CHECK-SLP-THRESHOLD-NEXT: ret void
;
entry:
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,16 @@ define void @foo() {
; SSE-NEXT: ret void
;
; AVX-LABEL: @foo(
; AVX-NEXT: [[TMP1:%.*]] = load i32, ptr @b, align 16
; AVX-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @b, i64 8), align 8
; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
; AVX-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
; AVX-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr @b, align 16
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
; AVX-NEXT: store <8 x i32> [[TMP5]], ptr @a, align 16
; AVX-NEXT: ret void
;
; AVX512-LABEL: @foo(
; AVX512-NEXT: [[TMP1:%.*]] = load i32, ptr @b, align 16
; AVX512-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @b, i64 8), align 8
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
; AVX512-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
; AVX512-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr @b, align 16
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
; AVX512-NEXT: store <8 x i32> [[TMP5]], ptr @a, align 16
; AVX512-NEXT: ret void
;
Expand Down
Loading