Skip to content

Commit

Permalink
MSL: Consider that function/private variables can be block-like.
Browse files Browse the repository at this point in the history
Handles a special case with array copies.
The implementation of this fix is not perfect, but should be good
enough for time being.
  • Loading branch information
HansKristian-Work committed Aug 23, 2021
1 parent 840d448 commit 2eea6a5
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];

thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}

device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}

constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}

threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};

struct _3
{
float _m0[4];
};

template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

template<typename T, uint A>
inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}

fragment void main0()
{
spvUnsafeArray<float, 4> _20;
_20[0u] = 0.0;
_20[1u] = 0.0;
_20[2u] = 0.0;
_20[3u] = 0.0;
_3 _19;
spvArrayCopyFromStackToStack1(_19._m0, _20.elements);
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
; SPIR-V
; Version: 1.3
; Generator: Google rspirv; 0
; Bound: 43
; Schema: 0
OpCapability ImageQuery
OpCapability Int8
OpCapability RuntimeDescriptorArray
OpCapability StorageImageWriteWithoutFormat
OpCapability Shader
OpCapability VulkanMemoryModel
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_vulkan_memory_model"
OpMemoryModel Logical Vulkan
OpEntryPoint Fragment %1 "main"
OpExecutionMode %1 OriginUpperLeft
OpDecorate %2 ArrayStride 4
OpMemberDecorate %3 0 Offset 0
%4 = OpTypeInt 32 0
%5 = OpTypeFloat 32
%6 = OpTypePointer Function %5
%7 = OpTypeVoid
%8 = OpTypeFunction %7
%9 = OpConstant %4 0
%10 = OpConstant %4 1
%11 = OpConstant %4 2
%12 = OpConstant %4 4
%13 = OpConstant %4 3
%14 = OpConstant %5 0
%2 = OpTypeArray %5 %12
%15 = OpTypePointer Function %2
%16 = OpTypeFunction %7 %15
%3 = OpTypeStruct %2
%17 = OpTypePointer Function %3
%1 = OpFunction %7 None %8
%31 = OpLabel
%33 = OpVariable %17 Function
%34 = OpVariable %15 Function
%39 = OpAccessChain %6 %34 %9
OpStore %39 %14
%40 = OpAccessChain %6 %34 %10
OpStore %40 %14
%41 = OpAccessChain %6 %34 %11
OpStore %41 %14
%42 = OpAccessChain %6 %34 %13
OpStore %42 %14
%37 = OpAccessChain %15 %33 %9
OpCopyMemory %37 %34
OpReturn
OpFunctionEnd
14 changes: 14 additions & 0 deletions spirv_msl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8511,13 +8511,27 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t r

// Special considerations for stage IO variables.
// If the variable is actually backed by non-user visible device storage, we use array templates for those.
//
// Another special consideration is given to thread local variables which happen to have Offset decorations
// applied to them. Block-like types do not use array templates, so we need to force POD path if we detect
// these scenarios. This check isn't perfect since it would be technically possible to mix and match these things,
// and for a fully correct solution we might have to track array template state through access chains as well,
// but for all reasonable use cases, this should suffice.
// This special case should also only apply to Function/Private storage classes.
// We should not check backing variable for temporaries.
auto *lhs_var = maybe_get_backing_variable(lhs_id);
if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
lhs_is_array_template = true;
else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) &&
type_is_block_like(get<SPIRType>(lhs_var->basetype)))
lhs_is_array_template = false;

auto *rhs_var = maybe_get_backing_variable(rhs_id);
if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
rhs_is_array_template = true;
else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) &&
type_is_block_like(get<SPIRType>(rhs_var->basetype)))
rhs_is_array_template = false;

// If threadgroup storage qualifiers are *not* used:
// Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
Expand Down

0 comments on commit 2eea6a5

Please sign in to comment.