Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MSL: Work around internal compiler error from mulhi. #2392

Merged
merged 1 commit into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
uint _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _20

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_20 _28;
_28._m0 = int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x]);
_28._m1 = mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_20 _28 = spvMulExtended<_20, uint>(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _28._m0;
_8._m0[gl_GlobalInvocationID.x] = _28._m1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
ulong _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _21

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_21 _29;
_29._m0 = long(_5._m0[gl_GlobalInvocationID.x]) * long(_6._m0[gl_GlobalInvocationID.x]);
_29._m1 = mulhi(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_21 _29 = spvMulExtended<_21, ulong>(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _29._m0;
_8._m0[gl_GlobalInvocationID.x] = _29._m1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct SSBOUint
{
uint a;
Expand Down Expand Up @@ -135,44 +143,28 @@ kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1
_106._m1 = select(uint4(1), uint4(0), u.a4 >= u.b4);
u.d4 = _106._m1;
u.c4 = _106._m0;
ResType _116;
_116._m0 = u.a * u.b;
_116._m1 = mulhi(u.a, u.b);
ResType _116 = spvMulExtended<ResType, uint>(u.a, u.b);
u.d = _116._m0;
u.c = _116._m1;
ResType_1 _125;
_125._m0 = u.a2 * u.b2;
_125._m1 = mulhi(u.a2, u.b2);
ResType_1 _125 = spvMulExtended<ResType_1, uint2>(u.a2, u.b2);
u.d2 = _125._m0;
u.c2 = _125._m1;
ResType_2 _134;
_134._m0 = u.a3 * u.b3;
_134._m1 = mulhi(u.a3, u.b3);
ResType_2 _134 = spvMulExtended<ResType_2, uint3>(u.a3, u.b3);
u.d3 = _134._m0;
u.c3 = _134._m1;
ResType_3 _143;
_143._m0 = u.a4 * u.b4;
_143._m1 = mulhi(u.a4, u.b4);
ResType_3 _143 = spvMulExtended<ResType_3, uint4>(u.a4, u.b4);
u.d4 = _143._m0;
u.c4 = _143._m1;
ResType_4 _160;
_160._m0 = i.a * i.b;
_160._m1 = mulhi(i.a, i.b);
ResType_4 _160 = spvMulExtended<ResType_4, int>(i.a, i.b);
i.d = _160._m0;
i.c = _160._m1;
ResType_5 _171;
_171._m0 = i.a2 * i.b2;
_171._m1 = mulhi(i.a2, i.b2);
ResType_5 _171 = spvMulExtended<ResType_5, int2>(i.a2, i.b2);
i.d2 = _171._m0;
i.c2 = _171._m1;
ResType_6 _182;
_182._m0 = i.a3 * i.b3;
_182._m1 = mulhi(i.a3, i.b3);
ResType_6 _182 = spvMulExtended<ResType_6, int3>(i.a3, i.b3);
i.d3 = _182._m0;
i.c3 = _182._m1;
ResType_7 _193;
_193._m0 = i.a4 * i.b4;
_193._m1 = mulhi(i.a4, i.b4);
ResType_7 _193 = spvMulExtended<ResType_7, int4>(i.a4, i.b4);
i.d4 = _193._m0;
i.c4 = _193._m1;
}
Expand Down
12 changes: 9 additions & 3 deletions reference/shaders-msl/asm/comp/uint_smulextended.asm.comp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
uint _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _20

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_20 _28;
_28._m0 = int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x]);
_28._m1 = mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_20 _28 = spvMulExtended<_20, uint>(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _28._m0;
_8._m0[gl_GlobalInvocationID.x] = _28._m1;
}
Expand Down
12 changes: 9 additions & 3 deletions reference/shaders-msl/asm/comp/ulong_smulextended.asm.msl23.comp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
ulong _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _21

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_21 _29;
_29._m0 = long(_5._m0[gl_GlobalInvocationID.x]) * long(_6._m0[gl_GlobalInvocationID.x]);
_29._m1 = mulhi(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_21 _29 = spvMulExtended<_21, ulong>(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _29._m0;
_8._m0[gl_GlobalInvocationID.x] = _29._m1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct SSBOUint
{
uint a;
Expand Down Expand Up @@ -135,44 +143,28 @@ kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1
_106._m1 = select(uint4(1), uint4(0), u.a4 >= u.b4);
u.d4 = _106._m1;
u.c4 = _106._m0;
ResType _116;
_116._m0 = u.a * u.b;
_116._m1 = mulhi(u.a, u.b);
ResType _116 = spvMulExtended<ResType, uint>(u.a, u.b);
u.d = _116._m0;
u.c = _116._m1;
ResType_1 _125;
_125._m0 = u.a2 * u.b2;
_125._m1 = mulhi(u.a2, u.b2);
ResType_1 _125 = spvMulExtended<ResType_1, uint2>(u.a2, u.b2);
u.d2 = _125._m0;
u.c2 = _125._m1;
ResType_2 _134;
_134._m0 = u.a3 * u.b3;
_134._m1 = mulhi(u.a3, u.b3);
ResType_2 _134 = spvMulExtended<ResType_2, uint3>(u.a3, u.b3);
u.d3 = _134._m0;
u.c3 = _134._m1;
ResType_3 _143;
_143._m0 = u.a4 * u.b4;
_143._m1 = mulhi(u.a4, u.b4);
ResType_3 _143 = spvMulExtended<ResType_3, uint4>(u.a4, u.b4);
u.d4 = _143._m0;
u.c4 = _143._m1;
ResType_4 _160;
_160._m0 = i.a * i.b;
_160._m1 = mulhi(i.a, i.b);
ResType_4 _160 = spvMulExtended<ResType_4, int>(i.a, i.b);
i.d = _160._m0;
i.c = _160._m1;
ResType_5 _171;
_171._m0 = i.a2 * i.b2;
_171._m1 = mulhi(i.a2, i.b2);
ResType_5 _171 = spvMulExtended<ResType_5, int2>(i.a2, i.b2);
i.d2 = _171._m0;
i.c2 = _171._m1;
ResType_6 _182;
_182._m0 = i.a3 * i.b3;
_182._m1 = mulhi(i.a3, i.b3);
ResType_6 _182 = spvMulExtended<ResType_6, int3>(i.a3, i.b3);
i.d3 = _182._m0;
i.c3 = _182._m1;
ResType_7 _193;
_193._m0 = i.a4 * i.b4;
_193._m1 = mulhi(i.a4, i.b4);
ResType_7 _193 = spvMulExtended<ResType_7, int4>(i.a4, i.b4);
i.d4 = _193._m0;
i.c4 = _193._m1;
}
Expand Down
20 changes: 17 additions & 3 deletions spirv_msl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7599,6 +7599,16 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;

case SPVFuncImplMulExtended:
// Compiler may hit an internal error with mulhi, but doesn't when encapsulated for some reason.
statement("template<typename T, typename U, typename V>");
statement("[[clang::optnone]] T spvMulExtended(V l, V r)");
begin_scope();
statement("return T{U(l * r), U(mulhi(l, r))};");
end_scope();
statement("");
break;

default:
break;
}
Expand Down Expand Up @@ -9550,13 +9560,13 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t op0 = ops[2];
uint32_t op1 = ops[3];
auto &type = get<SPIRType>(result_type);
auto &op_type = expression_type(op0);
auto input_type = opcode == OpSMulExtended ? int_type : uint_type;
string cast_op0, cast_op1;

binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false);
emit_uninitialized_temporary_expression(result_type, result_id);
statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", cast_op0, " * ", cast_op1, ";");
statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", cast_op0, ", ", cast_op1, ");");
auto expr = join("spvMulExtended<", type_to_glsl(type), ", ", type_to_glsl(op_type), ">(", cast_op0, ", ", cast_op1, ")");
emit_op(result_type, result_id, expr, true);
break;
}

Expand Down Expand Up @@ -17708,6 +17718,10 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
case OpSUDotAccSat:
return SPVFuncImplReduceAdd;

case OpSMulExtended:
case OpUMulExtended:
return SPVFuncImplMulExtended;

default:
break;
}
Expand Down
3 changes: 2 additions & 1 deletion spirv_msl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,8 @@ class CompilerMSL : public CompilerGLSL
SPVFuncImplPaddedStd140,
SPVFuncImplReduceAdd,
SPVFuncImplImageFence,
SPVFuncImplTextureCast
SPVFuncImplTextureCast,
SPVFuncImplMulExtended,
};

// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
Expand Down
Loading