Skip to content

Commit

Permalink
Merge pull request #229 from RossBrunton/testfix
Browse files Browse the repository at this point in the history
[vecz] Update tests for LLVM tip and fix alignment optimization
  • Loading branch information
RossBrunton authored Nov 23, 2023
2 parents fc409bc + 2f11238 commit c119c2b
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 68 deletions.
9 changes: 9 additions & 0 deletions modules/compiler/vecz/source/vecz_pass_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "analysis/vectorizable_function_analysis.h"
#include "analysis/vectorization_unit_analysis.h"
#include "debugging.h"
#include "multi_llvm/llvm_version.h"
#include "transform/common_gep_elimination_pass.h"
#include "transform/control_flow_conversion_pass.h"
#include "transform/inline_post_vectorization_pass.h"
Expand All @@ -71,6 +72,10 @@
#include "transform/scalarization_pass.h"
#include "transform/ternary_transform_pass.h"

#if LLVM_VERSION_GREATER_EQUAL(18, 0)
#include <llvm/Transforms/Scalar/InferAlignment.h>
#endif

#define DEBUG_TYPE "vecz"
using namespace llvm;
using namespace vecz;
Expand Down Expand Up @@ -252,6 +257,10 @@ bool vecz::buildPassPipeline(ModulePassManager &PM) {
PM.addPass(createModuleToFunctionPassAdaptor(
InterleavedGroupCombinePass(eInterleavedLoad)));
PM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass()));
#if LLVM_VERSION_GREATER_EQUAL(18, 0)
// LLVM 18 split this pass out of InstCombine
PM.addPass(createModuleToFunctionPassAdaptor(InferAlignmentPass()));
#endif
PM.addPass(createModuleToFunctionPassAdaptor(DCEPass()));
PM.addPass(createModuleToFunctionPassAdaptor(SimplifyMaskedMemOpsPass()));
PM.addPass(DefineInternalBuiltinsPass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ entry:
; CHECK-NEXT: store <32 x float> [[ADDEND:%.*]], ptr [[FIXLEN_ALLOC]], align 128
; CHECK-NEXT: [[IDX0:%.*]] = call <vscale x 128 x i32> @llvm.experimental.stepvector.nxv128i32()
; CHECK-NEXT: [[IDX1:%.*]] = and <vscale x 128 x i32> [[IDX0]], shufflevector (<vscale x 128 x i32> insertelement (<vscale x 128 x i32> {{(undef|poison)}}, i32 31, {{i32|i64}} 0), <vscale x 128 x i32> {{(undef|poison)}}, <vscale x 128 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP0:%.*]] = {{s|z}}ext <vscale x 128 x i32> [[IDX1]] to <vscale x 128 x i64>
; CHECK-NEXT: [[TMP0:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 128 x i32> [[IDX1]] to <vscale x 128 x i64>
; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds float, ptr [[FIXLEN_ALLOC]], <vscale x 128 x i64> [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 128 x float> @llvm.masked.gather.nxv128f32.nxv128p0(<vscale x 128 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 128 x i1> shufflevector (<vscale x 128 x i1> insertelement (<vscale x 128 x i1> poison, i1 true, {{i32|i64}} 0), <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer), <vscale x 128 x float> undef)
; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ entry:
; CHECK-NEXT: store <4 x float> [[ADDEND:%.*]], ptr [[FIXLEN_ALLOC]], align 16
; CHECK-NEXT: [[IDX0:%.*]] = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
; CHECK-NEXT: [[IDX1:%.*]] = and <vscale x 16 x i32> [[IDX0]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 3, {{(i32|i64)}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP0:%.*]] = {{s|z}}ext <vscale x 16 x i32> [[IDX1]] to <vscale x 16 x i64>
; CHECK-NEXT: [[TMP0:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[IDX1]] to <vscale x 16 x i64>
; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds float, ptr [[FIXLEN_ALLOC]], <vscale x 16 x i64> [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{(i32|i64)}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x float> undef)
; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
Expand Down Expand Up @@ -146,15 +146,15 @@ entry:
; CHECK-NEXT: store <4 x float> [[ADDEND:%.*]], ptr [[FIXLEN_ALLOC1]], align 16
; CHECK-NEXT: [[IDX03:%.*]] = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
; CHECK-NEXT: [[IDX14:%.*]] = and <vscale x 16 x i32> [[IDX03]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 3, {{i32|i64}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP0:%.*]] = {{s|z}}ext <vscale x 16 x i32> [[IDX14]] to <vscale x 16 x i64>
; CHECK-NEXT: [[TMP0:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[IDX14]] to <vscale x 16 x i64>
; CHECK-NEXT: [[VEC_ALLOC5:%.*]] = getelementptr inbounds float, ptr [[FIXLEN_ALLOC1]], <vscale x 16 x i64> [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC5]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{i32|i64}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x float> {{(undef|poison)}})
; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[EXISTING_ALLOC]], align 16
; CHECK-NEXT: store i32 1, ptr [[EXISTING_ALLOC]], align 16
; CHECK-NEXT: [[V:%.*]] = load <4 x i32>, ptr [[EXISTING_ALLOC]], align 16
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[FIXLEN_ALLOC]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = {{s|z}}ext <vscale x 16 x i32> [[IDX14]] to <vscale x 16 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[IDX14]] to <vscale x 16 x i64>
; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds i32, ptr [[FIXLEN_ALLOC]], <vscale x 16 x i64> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{i32|i64}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i32> {{(undef|poison)}})
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]]
Expand All @@ -173,7 +173,7 @@ entry:
; CHECK: [[IDX1:%.*]] = and <vscale x 16 x i32> [[IDX0]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 3, {{i32|i64}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
; CHECK: [[SEXT:%.*]] = sext <4 x i1> [[INPUT:%.*]] to <4 x i8>
; CHECK: store <4 x i8> [[SEXT]], ptr [[FIXLEN_MASK_ALLOC]], align 4
; CHECK: [[TMP0:%.*]] = {{s|z}}ext <vscale x 16 x i32> [[IDX1]] to <vscale x 16 x i64>
; CHECK: [[TMP0:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[IDX1]] to <vscale x 16 x i64>
; CHECK: [[VEC_ALLOC:%.*]] = getelementptr inbounds i8, ptr [[FIXLEN_MASK_ALLOC]], <vscale x 16 x i64> [[TMP0]]
; CHECK: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 1, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{i32|i64}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i8> {{(undef|poison)}})
; CHECK: [[BMASK:%.*]] = trunc <vscale x 16 x i8> [[TMP1]] to <vscale x 16 x i1>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ if.end:

; Note that since we just did a lshr 2 on the input of the extend, it doesn't
; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
; CHECK: [[idx2:%.*]] = {{s|z}}ext <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>
; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>

; CHECK: [[t1:%.*]] = getelementptr inbounds i8, ptr {{.*}}, <vscale x 16 x i64> [[idx2]]
; CHECK: [[t2:%.*]] = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> [[t1]],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ entry:

; Note that since we just did a lshr 1 on the input of the extend, it doesn't
; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
; CHECK: [[sext2:%.*]] = {{s|z}}ext <vscale x 8 x i32> [[idx1]] to <vscale x 8 x i64>
; CHECK: [[sext2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 8 x i32> [[idx1]] to <vscale x 8 x i64>

; CHECK: [[addrs:%.*]] = getelementptr inbounds i8, ptr [[alloc]], <vscale x 8 x i64> [[sext2]]
; CHECK: [[gather:%.*]] = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> [[addrs]],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ define spir_kernel void @do_shuffle_splat(i32* %aptr, <4 x i32>* %bptr, <4 x i32

; Note that since we just did a lshr 2 on the input of the extend, it doesn't
; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
; CHECK: [[idx2:%.*]] = {{s|z}}ext <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>
; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>

; CHECK: [[alloc:%.*]] = getelementptr inbounds i32, ptr %{{.*}}, <vscale x 16 x i64> [[idx2]]
; CHECK: [[splat:%.*]] = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> [[alloc]],
Expand Down
119 changes: 62 additions & 57 deletions modules/compiler/vecz/test/lit/llvm/emit_memintrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocaptur

declare i64 @__mux_get_local_id(i32)

; Note: Between LLVM 17 and LLVM 18, optimizations to alignments were moved to
; their own pass. We don't run that pass here, resulting in a difference in
; alignment values between LLVM versions. Because of that, we don't check
; alignment of any loads or stores

; Sanity checks: Make sure the non-vecz entry function is still in place and
; contains memset and memcpy. This is done in order to prevent future bafflement
; in case some pass optimizes them out.
Expand Down Expand Up @@ -92,104 +97,104 @@ declare i64 @__mux_get_local_id(i32)

; Check if the generated loads and stores are in place
; Check the stores for the first memset
; CHECK: store i64 %ms64val, ptr %sa, align 16
; CHECK: store i64 %ms64val, ptr %sa
; CHECK: %[[V14:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 8
; CHECK: store i64 %ms64val, ptr %[[V14]], align 8
; CHECK: store i64 %ms64val, ptr %[[V14]]
; CHECK: %[[V15:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 16
; CHECK: store i64 %ms64val, ptr %[[V15]], align {{(8|16)}}
; CHECK: store i64 %ms64val, ptr %[[V15]]
; CHECK: %[[V16:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 24
; CHECK: store i64 %ms64val, ptr %[[V16]], align 8
; CHECK: store i64 %ms64val, ptr %[[V16]]
; CHECK: %[[V17:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 32
; CHECK: store i64 %ms64val, ptr %[[V17]], align 16
; CHECK: store i64 %ms64val, ptr %[[V17]]
; CHECK: %[[V18:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 40
; CHECK: store i64 %ms64val, ptr %[[V18]], align 8
; CHECK: store i64 %ms64val, ptr %[[V18]]
; CHECK: %[[V19:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 48
; CHECK: store i64 %ms64val, ptr %[[V19]], align 16
; CHECK: store i64 %ms64val, ptr %[[V19]]
; CHECK: %[[V20:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 56
; CHECK-EQ14: %[[V20:[0-9]+]] = getelementptr inbounds %struct.S2, %struct.S2* %sa, i64 0, i32 3, i64 8
; CHECK: %[[V21:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 64
; CHECK: %[[V22:[0-9]+]] = getelementptr inbounds i8, ptr %sa, i64 72

; Check the stores for the second memset
; CHECK: store i64 0, ptr addrspace(1) %[[SB_I8AS]], align 16
; CHECK: store i64 0, ptr addrspace(1) %[[SB_I8AS]]
; CHECK: %[[V24:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 8
; CHECK: store i64 0, ptr addrspace(1) %[[V24]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V24]]
; CHECK: %[[V26:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 16
; CHECK: store i64 0, ptr addrspace(1) %[[V26]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V26]]
; CHECK: %[[V28:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 24
; CHECK: store i64 0, ptr addrspace(1) %[[V28]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V28]]
; CHECK: %[[V30:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 32
; CHECK: store i64 0, ptr addrspace(1) %[[V30]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V30]]
; CHECK: %[[V32:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 40
; CHECK: store i64 0, ptr addrspace(1) %[[V32]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V32]]
; CHECK: %[[V33:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 48
; CHECK: store i64 0, ptr addrspace(1) %[[V33]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V33]]
; CHECK: %[[V35T:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 56
; CHECK-EQ14: %[[V35T:[0-9]+]] = getelementptr inbounds %struct.S2, %struct.S2* %sb, i64 0, i32 3, i64 8
; CHECK-EQ14: %[[V35:[0-9]+]] = bitcast i8* %[[V35T]] to i64*
; CHECK-EQ14: %[[SB_I8AS18:.+]] = addrspacecast i64* %[[V35]] to i64 addrspace(1)*
; CHECK: store i64 0, ptr addrspace(1) %[[V35T]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V35T]]
; CHECK: %[[V36:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 64
; CHECK: store i64 0, ptr addrspace(1) %[[V36]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V36]]
; CHECK: %[[V38:[0-9]+]] = getelementptr inbounds i8, ptr addrspace(1) %[[SB_I8AS]], i64 72
; CHECK: store i64 0, ptr addrspace(1) %[[V38]], align 8
; CHECK: store i64 0, ptr addrspace(1) %[[V38]]


; Check the loads and stores for the first memcpy
; CHECK:middle: ; preds = %entry
; CHECK: %[[SA_I822:.+]] = load i64, ptr %sa, align 16
; CHECK: store i64 %[[SA_I822]], ptr addrspace(1) %[[SB_I8AS]], align 16
; CHECK: %[[SA_I824:.+]] = load i64, ptr %[[V14]], align 8
; CHECK: store i64 %[[SA_I824]], ptr addrspace(1) %[[V24]], align 8
; CHECK: %[[SA_I826:.+]] = load i64, ptr %[[V15]], align {{(8|16)}}
; CHECK: store i64 %[[SA_I826]], ptr addrspace(1) %[[V26]], align 8
; CHECK: %[[SA_I828:.+]] = load i64, ptr %[[V16]], align 8
; CHECK: store i64 %[[SA_I828]], ptr addrspace(1) %[[V28]], align 8
; CHECK: %[[SA_I830:.+]] = load i64, ptr %[[V17]], align 16
; CHECK: store i64 %[[SA_I830]], ptr addrspace(1) %[[V30]], align 8
; CHECK: %[[SA_I832:.+]] = load i64, ptr %[[V18]], align 8
; CHECK: store i64 %[[SA_I832]], ptr addrspace(1) %[[V32]], align 8
; CHECK: %[[SA_I834:.+]] = load i64, ptr %[[V19]], align 16
; CHECK: store i64 %[[SA_I834]], ptr addrspace(1) %[[V33]], align 8
; CHECK: %[[SA_I836:.+]] = load i64, ptr %[[V20]], align 8
; CHECK: store i64 %[[SA_I836]], ptr addrspace(1) %[[V35T]], align 8
; CHECK: %[[SA_I838:.+]] = load i64, ptr %[[V21]], align 16
; CHECK: store i64 %[[SA_I838]], ptr addrspace(1) %[[V36]], align 8
; CHECK: %[[SA_I840:.+]] = load i64, ptr %[[V22]], align 8
; CHECK: store i64 %[[SA_I840]], ptr addrspace(1) %[[V38]], align 8
; CHECK: %[[SA_I822:.+]] = load i64, ptr %sa
; CHECK: store i64 %[[SA_I822]], ptr addrspace(1) %[[SB_I8AS]]
; CHECK: %[[SA_I824:.+]] = load i64, ptr %[[V14]]
; CHECK: store i64 %[[SA_I824]], ptr addrspace(1) %[[V24]]
; CHECK: %[[SA_I826:.+]] = load i64, ptr %[[V15]]
; CHECK: store i64 %[[SA_I826]], ptr addrspace(1) %[[V26]]
; CHECK: %[[SA_I828:.+]] = load i64, ptr %[[V16]]
; CHECK: store i64 %[[SA_I828]], ptr addrspace(1) %[[V28]]
; CHECK: %[[SA_I830:.+]] = load i64, ptr %[[V17]]
; CHECK: store i64 %[[SA_I830]], ptr addrspace(1) %[[V30]]
; CHECK: %[[SA_I832:.+]] = load i64, ptr %[[V18]]
; CHECK: store i64 %[[SA_I832]], ptr addrspace(1) %[[V32]]
; CHECK: %[[SA_I834:.+]] = load i64, ptr %[[V19]]
; CHECK: store i64 %[[SA_I834]], ptr addrspace(1) %[[V33]]
; CHECK: %[[SA_I836:.+]] = load i64, ptr %[[V20]]
; CHECK: store i64 %[[SA_I836]], ptr addrspace(1) %[[V35T]]
; CHECK: %[[SA_I838:.+]] = load i64, ptr %[[V21]]
; CHECK: store i64 %[[SA_I838]], ptr addrspace(1) %[[V36]]
; CHECK: %[[SA_I840:.+]] = load i64, ptr %[[V22]]
; CHECK: store i64 %[[SA_I840]], ptr addrspace(1) %[[V38]]

; Check the loads and stores for the second memcpy
; CHECK:end: ; preds = %middle, %entry
; CHECK: %[[SB_I8AS42:.+]] = load i64, ptr addrspace(1) %[[SB_I8AS]], align 16
; CHECK: store i64 %[[SB_I8AS42]], ptr %result2, align 16
; CHECK: %[[SB_I8AS42:.+]] = load i64, ptr addrspace(1) %[[SB_I8AS]]
; CHECK: store i64 %[[SB_I8AS42]], ptr %result2
; CHECK: %[[V42:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 8
; CHECK: %[[SB_I8AS44:.+]] = load i64, ptr addrspace(1) %[[V24]], align 8
; CHECK: store i64 %[[SB_I8AS44]], ptr %[[V42]], align 8
; CHECK: %[[SB_I8AS44:.+]] = load i64, ptr addrspace(1) %[[V24]]
; CHECK: store i64 %[[SB_I8AS44]], ptr %[[V42]]
; CHECK: %[[V43:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 16
; CHECK: %[[SB_I8AS46:.+]] = load i64, ptr addrspace(1) %[[V26]], align 8
; CHECK: store i64 %[[SB_I8AS46]], ptr %[[V43]], align 8
; CHECK: %[[SB_I8AS46:.+]] = load i64, ptr addrspace(1) %[[V26]]
; CHECK: store i64 %[[SB_I8AS46]], ptr %[[V43]]
; CHECK: %[[V44:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 24
; CHECK: %[[SB_I8AS48:.+]] = load i64, ptr addrspace(1) %[[V28]], align 8
; CHECK: store i64 %[[SB_I8AS48]], ptr %[[V44]], align 8
; CHECK: %[[SB_I8AS48:.+]] = load i64, ptr addrspace(1) %[[V28]]
; CHECK: store i64 %[[SB_I8AS48]], ptr %[[V44]]
; CHECK: %[[V45:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 32
; CHECK: %[[SB_I8AS50:.+]] = load i64, ptr addrspace(1) %[[V30]], align 8
; CHECK: store i64 %[[SB_I8AS50]], ptr %[[V45]], align 8
; CHECK: %[[SB_I8AS50:.+]] = load i64, ptr addrspace(1) %[[V30]]
; CHECK: store i64 %[[SB_I8AS50]], ptr %[[V45]]
; CHECK: %[[V46:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 40
; CHECK: %[[SB_I8AS52:.+]] = load i64, ptr addrspace(1) %[[V32]], align 8
; CHECK: store i64 %[[SB_I8AS52]], ptr %[[V46]], align 8
; CHECK: %[[SB_I8AS52:.+]] = load i64, ptr addrspace(1) %[[V32]]
; CHECK: store i64 %[[SB_I8AS52]], ptr %[[V46]]
; CHECK: %[[V47:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 48
; CHECK: %[[SB_I8AS54:.+]] = load i64, ptr addrspace(1) %[[V33]], align 8
; CHECK: store i64 %[[SB_I8AS54]], ptr %[[V47]], align 8
; CHECK: %[[SB_I8AS54:.+]] = load i64, ptr addrspace(1) %[[V33]]
; CHECK: store i64 %[[SB_I8AS54]], ptr %[[V47]]
; CHECK: %[[V48:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 56
; CHECK-EQ14: %[[V48:[0-9]+]] = getelementptr inbounds %struct.S2, %struct.S2* %result2, i64 0, i32 3, i64 8
; CHECK: %[[SB_I8AS56:.+]] = load i64, ptr addrspace(1) %[[V35T]], align 8
; CHECK: store i64 %[[SB_I8AS56]], ptr %[[V48]], align 8
; CHECK: %[[SB_I8AS56:.+]] = load i64, ptr addrspace(1) %[[V35T]]
; CHECK: store i64 %[[SB_I8AS56]], ptr %[[V48]]
; CHECK: %[[V49:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 64
; CHECK: %[[SB_I8AS58:.+]] = load i64, ptr addrspace(1) %[[V36]], align 8
; CHECK: store i64 %[[SB_I8AS58]], ptr %[[V49]], align 8
; CHECK: %[[SB_I8AS58:.+]] = load i64, ptr addrspace(1) %[[V36]]
; CHECK: store i64 %[[SB_I8AS58]], ptr %[[V49]]
; CHECK: %[[V50:[0-9]+]] = getelementptr inbounds i8, ptr %result2, i64 72
; CHECK: %[[SB_I8AS60:.+]] = load i64, ptr addrspace(1) %[[V38]], align 8
; CHECK: store i64 %[[SB_I8AS60]], ptr %[[V50]], align 8
; CHECK: %[[SB_I8AS60:.+]] = load i64, ptr addrspace(1) %[[V38]]
; CHECK: store i64 %[[SB_I8AS60]], ptr %[[V50]]

; End of function
; CHECK: ret void
6 changes: 3 additions & 3 deletions modules/compiler/vecz/test/lit/llvm/uniform_loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ merge:
}

; CHECK: define spir_kernel void @__vecz_v4_test
; CHECK: loop:
; CHECK: %load = load i32, ptr addrspace(1) %in
; CHECK: store i32 %load, ptr addrspace(1) %slot
; CHECK-NOT: define spir_kernel void @test
; CHECK: %[[LOAD:load.*]] = load i32, ptr addrspace(1) %in
; CHECK: store i32 %[[LOAD]], ptr addrspace(1) %slot

0 comments on commit c119c2b

Please sign in to comment.