Skip to content

Commit

Permalink
skinned_meshlets port to VK_EXT_mesh_shader... almost there!
Browse files Browse the repository at this point in the history
Just not quite yet for the packed/redirected case.
  • Loading branch information
johannesugb committed May 24, 2023
1 parent fffbd82 commit 32d0f2b
Show file tree
Hide file tree
Showing 11 changed files with 607 additions and 216 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,11 @@ layout (location = 0) in PerVertexData
} v_in;

layout(push_constant) uniform PushConstants {
uint highlightMeshlets;
bool mHighlightMeshlets;
int mVisibleMeshletIndexFrom;
int mVisibleMeshletIndexTo;
} pushConstants;


layout (location = 0) out vec4 fs_out;

void main()
Expand All @@ -92,7 +93,7 @@ void main()
vec3 illum = vec3(ambient) + diffuse * max(0.0, dot(normalize(v_in.normalWS), toLight));
color *= illum;

if(pushConstants.highlightMeshlets == 1) {
if(pushConstants.mHighlightMeshlets) {
color = mix(color, v_in.color, 0.5);
}

Expand Down
134 changes: 70 additions & 64 deletions examples/skinned_meshlets/shaders/meshlet.mesh
Original file line number Diff line number Diff line change
@@ -1,39 +1,43 @@
#version 460
#extension GL_EXT_shader_16bit_storage: require
#extension GL_EXT_shader_8bit_storage: require
#extension GL_NV_mesh_shader : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_GOOGLE_include_directive : enable
#extension GL_ARB_shader_draw_parameters: require
#extension GL_EXT_debug_printf : enable
#include "cpu_gpu_shared_config.h"
#include "glsl_helpers.glsl"


layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
layout (constant_id = 0) const uint NUM_TASK_SHADER_INVOCATIONS = 1; // <- Expected to be set to 32 for Nvidia on host side.
layout (constant_id = 1) const uint NUM_MESH_SHADER_INVOCATIONS = 1; // <- Expected to be set to 32 for Nvidia on host side.
layout(local_size_x_id = 1, local_size_y = 1, local_size_z = 1) in;
// The max_vertices and max_primitives numbers have been recommended by Nvidia:
layout(triangles, max_vertices = 64, max_primitives = 126) out;

layout(set = 0, binding = 1) uniform CameraTransform
{
mat4 mViewProjMatrix;
} ubo;

taskPayloadSharedEXT uint meshletIndices[NUM_TASK_SHADER_INVOCATIONS];

//-------------------------------------
//////////////////////////////////////////////////////////////////////
// Meshlet data passed from the host side:
#if !USE_REDIRECTED_GPU_DATA
struct meshlet
{
uint vertices[64];
uint8_t indices[378];
uint8_t vertex_count;
uint8_t triangle_count;
uint mVertices[64];
uint8_t mIndices[378]; // 126 triangles * 3 indices
uint8_t mVertexCount;
uint8_t mTriangleCount;
};
#else
struct meshlet
{
uint data_offset;
uint8_t vertex_count;
uint8_t triangle_count;
uint mDataOffset;
uint8_t mVertexCount;
uint8_t mTriangleCount;
};
#endif

Expand All @@ -47,11 +51,6 @@ struct extended_meshlet
meshlet mGeometry;
};

in taskNV block
{
uint meshletIndices[32];
};

layout(set = 2, binding = 0) buffer BoneMatrices
{
mat4 mat[]; // length of #bones
Expand All @@ -65,11 +64,11 @@ layout(set = 3, binding = 4) uniform usamplerBuffer meshletVertexIndices[];
#endif
layout(set = 3, binding = 5) uniform usamplerBuffer boneIndicesBuffers[];
layout(set = 3, binding = 6) uniform samplerBuffer boneWeightsBuffers[];
layout(set = 4, binding = 0) buffer MeshletsBuffer { extended_meshlet values[]; } meshletsBuffer;
layout(set = 4, binding = 0) buffer MeshletsBuffer { extended_meshlet mValues[]; } meshletsBuffer;
//-------------------------------------

// Mesh shader output block.
//
//////////////////////////////////////////////////////////////////////
// Mesh shader output block:
layout (location = 0) out PerVertexData
{
vec3 positionWS;
Expand All @@ -78,49 +77,49 @@ layout (location = 0) out PerVertexData
flat int materialIndex;
vec3 color;
} v_out[]; // [max_vertices]


// Color table for drawing each meshlet with a different color.
//

// Color table for drawing each meshlet with a different color:
#define MAX_COLORS 10
vec3 meshletcolors[MAX_COLORS] = {
vec3(1,0,0),
vec3(0,1,0),
vec3(0,0,1),
vec3(1,1,0),
vec3(1,0,1),
vec3(0,1,1),
vec3(1,0.5,0),
vec3(0.5,1,0),
vec3(0,0.5,1),
vec3(1,1,1)
vec3(0.9, 0.1, 0.1),
vec3(0.7, 0.2, 0.2),
vec3(0.5, 0.3, 0.3),
vec3(0.3, 0.4, 0.5),
vec3(0.2, 0.5, 0.7),
vec3(0.1, 0.7, 0.9),
vec3(0.3, 0.6, 0.8),
vec3(0.6, 0.8, 0.9),
vec3(0.8, 0.9, 0.95),
vec3(1.0, 1.0, 1.0)
};

//////////////////////////////////////////////////////////////////////
// Mesh Shader Main:
void main()
{
uint mshIdx = meshletIndices[gl_WorkGroupID.x];
uint thread_id = gl_LocalInvocationID.x;
uint meshletIndex = meshletIndices[gl_WorkGroupID.x];
uint invocationId = gl_LocalInvocationID.x;

// Get all the meshlet data:
uint vertexCount = uint(meshletsBuffer.values[mshIdx].mGeometry.vertex_count);
uint triangleCount = uint(meshletsBuffer.values[mshIdx].mGeometry.triangle_count);
uint materialIndex = meshletsBuffer.values[mshIdx].mMaterialIndex;
mat4 transformationMatrix = meshletsBuffer.values[mshIdx].mTransformationMatrix;
uint vertexCount = uint(meshletsBuffer.mValues[meshletIndex].mGeometry.mVertexCount);
uint triangleCount = uint(meshletsBuffer.mValues[meshletIndex].mGeometry.mTriangleCount);
uint materialIndex = meshletsBuffer.mValues[meshletIndex].mMaterialIndex;
mat4 transformationMatrix = meshletsBuffer.mValues[meshletIndex].mTransformationMatrix;

uint modelIdx = meshletsBuffer.values[mshIdx].mModelIndex;
uint texelBufferIndex = meshletsBuffer.values[mshIdx].mTexelBufferIndex;
uint modelIdx = meshletsBuffer.mValues[meshletIndex].mModelIndex;
uint texelBufferIndex = meshletsBuffer.mValues[meshletIndex].mTexelBufferIndex;
#if USE_REDIRECTED_GPU_DATA
uint indexOffset = uint(meshletsBuffer.values[mshIdx].mGeometry.data_offset + vertexCount);
uint indexOffset = uint(meshletsBuffer.mValues[meshletIndex].mGeometry.mDataOffset + vertexCount);
#endif

// Step by 32 as we have that many threads but potentially more vertices to calculate:
for (uint i = thread_id; i < vertexCount; i+=32)
// Step by NUM_MESH_SHADER_INVOCATIONS as we have that many threads but potentially more vertices to calculate:
for (uint i = invocationId; i < vertexCount; i += NUM_MESH_SHADER_INVOCATIONS)
{
// Get the vertex index:
#if !USE_REDIRECTED_GPU_DATA
uint vi = meshletsBuffer.values[mshIdx].mGeometry.vertices[i];
uint vi = meshletsBuffer.mValues[meshletIndex].mGeometry.mVertices[i];
#else
uint vi = texelFetch(meshletVertexIndices[texelBufferIndex], int(meshletsBuffer.values[mshIdx].mGeometry.data_offset + i)).x;
uint vi = texelFetch(meshletVertexIndices[texelBufferIndex], int(meshletsBuffer.values[meshletIndex].mGeometry.mDataOffset + i)).x;
#endif
// Get vertex data from the appropriate texel buffers and vertex indices:
vec4 posMshSp = vec4(texelFetch(positionBuffers[texelBufferIndex], int(vi)).xyz, 1.0);
Expand Down Expand Up @@ -153,40 +152,47 @@ void main()
vec4 posCS = ubo.mViewProjMatrix * posWS;

// Set the vertex position:
gl_MeshVerticesNV[i].gl_Position = posCS;
gl_MeshVerticesEXT[i].gl_Position = posCS;

// Set the per vertex data for the fragment shader:
v_out[i].positionWS = posWS.xyz;
v_out[i].normalWS = mat3(transformationMatrix) * aniNrm;
v_out[i].texCoord = texelFetch(texCoordsBuffers[texelBufferIndex], int(vi)).st;
v_out[i].materialIndex = int(materialIndex);
v_out[i].color = meshletcolors[mshIdx % MAX_COLORS];
v_out[i].color = meshletcolors[meshletIndex % MAX_COLORS];
}

// Only set once:
if(thread_id == 0)
{
gl_PrimitiveCountNV = triangleCount;
}
// Sets the actual output size of the primitives and vertices that the
// mesh shader workgroup will emit upon completion:
SetMeshOutputsEXT(vertexCount, triangleCount);
// ^ Note: This is the correct way and place to call SetMeshOutputsEXT, as the spec says:
//
// > The arguments are taken from the first invocation in each workgroup.
// > Any invocation must execute this instruction no more than once and
// > under uniform control flow. There must not be any control flow path
// > to an output write that is not preceded by this instruction.

// Write the indices for the vertices:
#if !USE_REDIRECTED_GPU_DATA
// write for each triangle, also note the 32 stepping
for (uint i = thread_id; i < triangleCount; i+=32)
// Write for each triangle, also note the NUM_MESH_SHADER_INVOCATIONS stepping
for (uint i = invocationId; i < triangleCount; i += NUM_MESH_SHADER_INVOCATIONS)
{
gl_PrimitiveIndicesNV[i*3 + 0] = uint(meshletsBuffer.values[mshIdx].mGeometry.indices[i * 3 + 0]);
gl_PrimitiveIndicesNV[i*3 + 1] = uint(meshletsBuffer.values[mshIdx].mGeometry.indices[i * 3 + 1]);
gl_PrimitiveIndicesNV[i*3 + 2] = uint(meshletsBuffer.values[mshIdx].mGeometry.indices[i * 3 + 2]);
// Write the indices for the vertices:
gl_PrimitiveTriangleIndicesEXT[i] = uvec3(
meshletsBuffer.mValues[meshletIndex].mGeometry.mIndices[i * 3 + 0],
meshletsBuffer.mValues[meshletIndex].mGeometry.mIndices[i * 3 + 1],
meshletsBuffer.mValues[meshletIndex].mGeometry.mIndices[i * 3 + 2]
);
}
#else // USE_REDIRECTED_GPU_DATA
// Here we have the indices packed into an uint, so we can write each of the uints at once:
uint indexGroupCount = (triangleCount * 3 + 3) / 4; // need to calculate how many packed uints we have
for (uint i = thread_id; i < indexGroupCount; i += 32)
// Write for each triangle, also note the NUM_MESH_SHADER_INVOCATIONS stepping
for (uint i = invocationId; i < triangleCount; i += NUM_MESH_SHADER_INVOCATIONS)
{
uint index = uint(texelFetch(meshletVertexIndices[texelBufferIndex], int(indexOffset + i))).x;
// This writes the whole uint at once:
writePackedPrimitiveIndices4x8NV(i * 4, index);
// Unpack and then write the indices for the vertices:
gl_PrimitiveTriangleIndicesEXT[i] = uvec3((index >> 16) & 0xFF, (index >> 8) & 0xFF, (index >> 0) & 0xFF);
}
#endif
}

Loading

0 comments on commit 32d0f2b

Please sign in to comment.