Skip to content

Commit

Permalink
MSL: Work around internal compiler error from mulhi.
Browse files Browse the repository at this point in the history
  • Loading branch information
squidbus committed Oct 14, 2024
1 parent 36e5456 commit 80cdb5e
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 64 deletions.
12 changes: 9 additions & 3 deletions reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
uint _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _20

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_20 _28;
_28._m0 = int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x]);
_28._m1 = mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_20 _28 = spvMulExtended<_20, uint>(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _28._m0;
_8._m0[gl_GlobalInvocationID.x] = _28._m1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
ulong _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _21

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_21 _29;
_29._m0 = long(_5._m0[gl_GlobalInvocationID.x]) * long(_6._m0[gl_GlobalInvocationID.x]);
_29._m1 = mulhi(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_21 _29 = spvMulExtended<_21, ulong>(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _29._m0;
_8._m0[gl_GlobalInvocationID.x] = _29._m1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct SSBOUint
{
uint a;
Expand Down Expand Up @@ -135,44 +143,28 @@ kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1
_106._m1 = select(uint4(1), uint4(0), u.a4 >= u.b4);
u.d4 = _106._m1;
u.c4 = _106._m0;
ResType _116;
_116._m0 = u.a * u.b;
_116._m1 = mulhi(u.a, u.b);
ResType _116 = spvMulExtended<ResType, uint>(u.a, u.b);
u.d = _116._m0;
u.c = _116._m1;
ResType_1 _125;
_125._m0 = u.a2 * u.b2;
_125._m1 = mulhi(u.a2, u.b2);
ResType_1 _125 = spvMulExtended<ResType_1, uint2>(u.a2, u.b2);
u.d2 = _125._m0;
u.c2 = _125._m1;
ResType_2 _134;
_134._m0 = u.a3 * u.b3;
_134._m1 = mulhi(u.a3, u.b3);
ResType_2 _134 = spvMulExtended<ResType_2, uint3>(u.a3, u.b3);
u.d3 = _134._m0;
u.c3 = _134._m1;
ResType_3 _143;
_143._m0 = u.a4 * u.b4;
_143._m1 = mulhi(u.a4, u.b4);
ResType_3 _143 = spvMulExtended<ResType_3, uint4>(u.a4, u.b4);
u.d4 = _143._m0;
u.c4 = _143._m1;
ResType_4 _160;
_160._m0 = i.a * i.b;
_160._m1 = mulhi(i.a, i.b);
ResType_4 _160 = spvMulExtended<ResType_4, int>(i.a, i.b);
i.d = _160._m0;
i.c = _160._m1;
ResType_5 _171;
_171._m0 = i.a2 * i.b2;
_171._m1 = mulhi(i.a2, i.b2);
ResType_5 _171 = spvMulExtended<ResType_5, int2>(i.a2, i.b2);
i.d2 = _171._m0;
i.c2 = _171._m1;
ResType_6 _182;
_182._m0 = i.a3 * i.b3;
_182._m1 = mulhi(i.a3, i.b3);
ResType_6 _182 = spvMulExtended<ResType_6, int3>(i.a3, i.b3);
i.d3 = _182._m0;
i.c3 = _182._m1;
ResType_7 _193;
_193._m0 = i.a4 * i.b4;
_193._m1 = mulhi(i.a4, i.b4);
ResType_7 _193 = spvMulExtended<ResType_7, int4>(i.a4, i.b4);
i.d4 = _193._m0;
i.c4 = _193._m1;
}
Expand Down
12 changes: 9 additions & 3 deletions reference/shaders-msl/asm/comp/uint_smulextended.asm.comp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
uint _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _20

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_20 _28;
_28._m0 = int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x]);
_28._m1 = mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_20 _28 = spvMulExtended<_20, uint>(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _28._m0;
_8._m0[gl_GlobalInvocationID.x] = _28._m1;
}
Expand Down
12 changes: 9 additions & 3 deletions reference/shaders-msl/asm/comp/ulong_smulextended.asm.msl23.comp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct _4
{
ulong _m0[1];
Expand All @@ -16,9 +24,7 @@ struct _21

kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_21 _29;
_29._m0 = long(_5._m0[gl_GlobalInvocationID.x]) * long(_6._m0[gl_GlobalInvocationID.x]);
_29._m1 = mulhi(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_21 _29 = spvMulExtended<_21, ulong>(long(_5._m0[gl_GlobalInvocationID.x]), long(_6._m0[gl_GlobalInvocationID.x]));
_7._m0[gl_GlobalInvocationID.x] = _29._m0;
_8._m0[gl_GlobalInvocationID.x] = _29._m1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, typename U, typename V>
[[clang::optnone]] T spvMulExtended(V l, V r)
{
return T{U(l * r), U(mulhi(l, r))};
}

struct SSBOUint
{
uint a;
Expand Down Expand Up @@ -135,44 +143,28 @@ kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1
_106._m1 = select(uint4(1), uint4(0), u.a4 >= u.b4);
u.d4 = _106._m1;
u.c4 = _106._m0;
ResType _116;
_116._m0 = u.a * u.b;
_116._m1 = mulhi(u.a, u.b);
ResType _116 = spvMulExtended<ResType, uint>(u.a, u.b);
u.d = _116._m0;
u.c = _116._m1;
ResType_1 _125;
_125._m0 = u.a2 * u.b2;
_125._m1 = mulhi(u.a2, u.b2);
ResType_1 _125 = spvMulExtended<ResType_1, uint2>(u.a2, u.b2);
u.d2 = _125._m0;
u.c2 = _125._m1;
ResType_2 _134;
_134._m0 = u.a3 * u.b3;
_134._m1 = mulhi(u.a3, u.b3);
ResType_2 _134 = spvMulExtended<ResType_2, uint3>(u.a3, u.b3);
u.d3 = _134._m0;
u.c3 = _134._m1;
ResType_3 _143;
_143._m0 = u.a4 * u.b4;
_143._m1 = mulhi(u.a4, u.b4);
ResType_3 _143 = spvMulExtended<ResType_3, uint4>(u.a4, u.b4);
u.d4 = _143._m0;
u.c4 = _143._m1;
ResType_4 _160;
_160._m0 = i.a * i.b;
_160._m1 = mulhi(i.a, i.b);
ResType_4 _160 = spvMulExtended<ResType_4, int>(i.a, i.b);
i.d = _160._m0;
i.c = _160._m1;
ResType_5 _171;
_171._m0 = i.a2 * i.b2;
_171._m1 = mulhi(i.a2, i.b2);
ResType_5 _171 = spvMulExtended<ResType_5, int2>(i.a2, i.b2);
i.d2 = _171._m0;
i.c2 = _171._m1;
ResType_6 _182;
_182._m0 = i.a3 * i.b3;
_182._m1 = mulhi(i.a3, i.b3);
ResType_6 _182 = spvMulExtended<ResType_6, int3>(i.a3, i.b3);
i.d3 = _182._m0;
i.c3 = _182._m1;
ResType_7 _193;
_193._m0 = i.a4 * i.b4;
_193._m1 = mulhi(i.a4, i.b4);
ResType_7 _193 = spvMulExtended<ResType_7, int4>(i.a4, i.b4);
i.d4 = _193._m0;
i.c4 = _193._m1;
}
Expand Down
20 changes: 17 additions & 3 deletions spirv_msl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7599,6 +7599,16 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;

case SPVFuncImplMulExtended:
// Compiler may hit an internal error with mulhi, but doesn't when encapsulated for some reason.
statement("template<typename T, typename U, typename V>");
statement("[[clang::optnone]] T spvMulExtended(V l, V r)");
begin_scope();
statement("return T{U(l * r), U(mulhi(l, r))};");
end_scope();
statement("");
break;

default:
break;
}
Expand Down Expand Up @@ -9550,13 +9560,13 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t op0 = ops[2];
uint32_t op1 = ops[3];
auto &type = get<SPIRType>(result_type);
auto &op_type = expression_type(op0);
auto input_type = opcode == OpSMulExtended ? int_type : uint_type;
string cast_op0, cast_op1;

binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false);
emit_uninitialized_temporary_expression(result_type, result_id);
statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", cast_op0, " * ", cast_op1, ";");
statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", cast_op0, ", ", cast_op1, ");");
auto expr = join("spvMulExtended<", type_to_glsl(type), ", ", type_to_glsl(op_type), ">(", cast_op0, ", ", cast_op1, ")");
emit_op(result_type, result_id, expr, true);
break;
}

Expand Down Expand Up @@ -17708,6 +17718,10 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
case OpSUDotAccSat:
return SPVFuncImplReduceAdd;

case OpSMulExtended:
case OpUMulExtended:
return SPVFuncImplMulExtended;

default:
break;
}
Expand Down
3 changes: 2 additions & 1 deletion spirv_msl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,8 @@ class CompilerMSL : public CompilerGLSL
SPVFuncImplPaddedStd140,
SPVFuncImplReduceAdd,
SPVFuncImplImageFence,
SPVFuncImplTextureCast
SPVFuncImplTextureCast,
SPVFuncImplMulExtended,
};

// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
Expand Down

0 comments on commit 80cdb5e

Please sign in to comment.