Skip to content

Commit

Permalink
Ported skinned_meshlets to VK_EXT_mesh_shader
Browse files Browse the repository at this point in the history
  • Loading branch information
johannesugb committed May 27, 2023
1 parent 32d0f2b commit 4c6e8c9
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 33 deletions.
2 changes: 1 addition & 1 deletion examples/skinned_meshlets/shaders/cpu_gpu_shared_config.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#ifndef CPU_GPU_SHARED_CONFIG_H
#define CPU_GPU_SHARED_CONFIG_H

#define USE_REDIRECTED_GPU_DATA 0
#define USE_REDIRECTED_GPU_DATA 1

#endif // CPU_GPU_SHARED_CONFIG_H
15 changes: 8 additions & 7 deletions examples/skinned_meshlets/shaders/meshlet.mesh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ layout(set = 3, binding = 0) uniform samplerBuffer positionBuffers[];
layout(set = 3, binding = 2) uniform samplerBuffer normalBuffers[];
layout(set = 3, binding = 3) uniform samplerBuffer texCoordsBuffers[];
#if USE_REDIRECTED_GPU_DATA
layout(set = 3, binding = 4) uniform usamplerBuffer meshletVertexIndices[];
layout(std430, set = 3, binding = 4) buffer IndicesBuffer { uint mIndices[]; } indicesBuffers[];
layout(std430, set = 3, binding = 4) buffer IndicesBuffer_u8 { uint8_t mIndices[]; } indicesBuffers_u8[];
#endif
layout(set = 3, binding = 5) uniform usamplerBuffer boneIndicesBuffers[];
layout(set = 3, binding = 6) uniform samplerBuffer boneWeightsBuffers[];
Expand Down Expand Up @@ -119,7 +120,7 @@ void main()
#if !USE_REDIRECTED_GPU_DATA
uint vi = meshletsBuffer.mValues[meshletIndex].mGeometry.mVertices[i];
#else
uint vi = texelFetch(meshletVertexIndices[texelBufferIndex], int(meshletsBuffer.values[meshletIndex].mGeometry.mDataOffset + i)).x;
uint vi = uint(indicesBuffers[texelBufferIndex].mIndices[int(meshletsBuffer.mValues[meshletIndex].mGeometry.mDataOffset + i)]);
#endif
// Get vertex data from the appropriate texel buffers and vertex indices:
vec4 posMshSp = vec4(texelFetch(positionBuffers[texelBufferIndex], int(vi)).xyz, 1.0);
Expand Down Expand Up @@ -185,14 +186,14 @@ void main()
);
}
#else // USE_REDIRECTED_GPU_DATA
// Here we have the indices packed into an uint, so we can write each of the uints at once:
uint indexGroupCount = (triangleCount * 3 + 3) / 4; // need to calculate how many packed uints we have
// Write for each triangle, also note the NUM_MESH_SHADER_INVOCATIONS stepping
for (uint i = invocationId; i < triangleCount; i += NUM_MESH_SHADER_INVOCATIONS)
{
uint index = uint(texelFetch(meshletVertexIndices[texelBufferIndex], int(indexOffset + i))).x;
// Unpack and then write the indices for the vertices:
gl_PrimitiveTriangleIndicesEXT[i] = uvec3((index >> 16) & 0xFF, (index >> 8) & 0xFF, (index >> 0) & 0xFF);
gl_PrimitiveTriangleIndicesEXT[i] = uvec3(
uint(indicesBuffers_u8[texelBufferIndex].mIndices[int(indexOffset*4 + i*3 + 0)]),
uint(indicesBuffers_u8[texelBufferIndex].mIndices[int(indexOffset*4 + i*3 + 1)]),
uint(indicesBuffers_u8[texelBufferIndex].mIndices[int(indexOffset*4 + i*3 + 2)])
);
}
#endif
}
33 changes: 21 additions & 12 deletions examples/skinned_meshlets/shaders/meshlet.nv.mesh
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ layout(set = 3, binding = 0) uniform samplerBuffer positionBuffers[];
layout(set = 3, binding = 2) uniform samplerBuffer normalBuffers[];
layout(set = 3, binding = 3) uniform samplerBuffer texCoordsBuffers[];
#if USE_REDIRECTED_GPU_DATA
layout(set = 3, binding = 4) uniform usamplerBuffer meshletVertexIndices[];
layout(std430, set = 3, binding = 4) buffer IndicesBuffer { uint mIndices[]; } indicesBuffers[];
#endif
layout(set = 3, binding = 5) uniform usamplerBuffer boneIndicesBuffers[];
layout(set = 3, binding = 6) uniform samplerBuffer boneWeightsBuffers[];
layout(set = 4, binding = 0) buffer MeshletsBuffer { extended_meshlet values[]; } meshletsBuffer;
layout(set = 4, binding = 0) buffer MeshletsBuffer { extended_meshlet mValues[]; } meshletsBuffer;
//-------------------------------------

//////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -104,15 +104,15 @@ void main()
uint invocationId = gl_LocalInvocationID.x;

// Get all the meshlet data:
uint vertexCount = uint(meshletsBuffer.values[meshletIndex].mGeometry.mVertexCount);
uint triangleCount = uint(meshletsBuffer.values[meshletIndex].mGeometry.mTriangleCount);
uint materialIndex = meshletsBuffer.values[meshletIndex].mMaterialIndex;
mat4 transformationMatrix = meshletsBuffer.values[meshletIndex].mTransformationMatrix;
uint vertexCount = uint(meshletsBuffer.mValues[meshletIndex].mGeometry.mVertexCount);
uint triangleCount = uint(meshletsBuffer.mValues[meshletIndex].mGeometry.mTriangleCount);
uint materialIndex = meshletsBuffer.mValues[meshletIndex].mMaterialIndex;
mat4 transformationMatrix = meshletsBuffer.mValues[meshletIndex].mTransformationMatrix;

uint modelIdx = meshletsBuffer.values[meshletIndex].mModelIndex;
uint texelBufferIndex = meshletsBuffer.values[meshletIndex].mTexelBufferIndex;
uint modelIdx = meshletsBuffer.mValues[meshletIndex].mModelIndex;
uint texelBufferIndex = meshletsBuffer.mValues[meshletIndex].mTexelBufferIndex;
#if USE_REDIRECTED_GPU_DATA
uint indexOffset = uint(meshletsBuffer.values[meshletIndex].mGeometry.mDataOffset + vertexCount);
uint indexOffset = uint(meshletsBuffer.mValues[meshletIndex].mGeometry.mDataOffset + vertexCount);
#endif

// Step by NUM_MESH_SHADER_INVOCATIONS as we have that many threads but potentially more vertices to calculate:
Expand All @@ -122,7 +122,7 @@ void main()
#if !USE_REDIRECTED_GPU_DATA
uint vi = meshletsBuffer.values[meshletIndex].mGeometry.mVertices[i];
#else
uint vi = texelFetch(meshletVertexIndices[texelBufferIndex], int(meshletsBuffer.values[meshletIndex].mGeometry.mDataOffset + i)).x;
uint vi = uint(indicesBuffers[texelBufferIndex].mIndices[int(meshletsBuffer.mValues[meshletIndex].mGeometry.mDataOffset + i)]);
#endif
// Get vertex data from the appropriate texel buffers and vertex indices:
vec4 posMshSp = vec4(texelFetch(positionBuffers[texelBufferIndex], int(vi)).xyz, 1.0);
Expand Down Expand Up @@ -185,11 +185,20 @@ void main()
// Here we have the indices packed into an uint, so we can write each of the uints at once:
uint indexGroupCount = (triangleCount * 3 + 3) / 4; // need to calculate how many packed uints we have
// Write for each triangle, also note the NUM_MESH_SHADER_INVOCATIONS stepping
for (uint i = invocationId; i < triangleCount; i += NUM_MESH_SHADER_INVOCATIONS)
for (uint i = invocationId; i < indexGroupCount; i += NUM_MESH_SHADER_INVOCATIONS)
{
uint index = uint(texelFetch(meshletVertexIndices[texelBufferIndex], int(indexOffset + i))).x;
uint index = uint(indicesBuffers[texelBufferIndex].mIndices[int(indexOffset + i)]);
// This writes the whole uint at once:
writePackedPrimitiveIndices4x8NV(i * 4, index);
// ^ from the GLSL_NV_mesh_shader spec:
// > void writePackedPrimitiveIndices4x8NV(uint indexOffset,
// > uint packedIndices)
// > Interprets the <packedIndices> as four 8 bit unsigned int values and
// > stores them into the gl_PrimitiveIndicesNV array starting from the
// > provided <indexOffset>, which must be a multiple of four.
// > Lower bytes are stored at lower addresses in the array.
// > The write operations must not exceed the size of the
// > gl_PrimitiveIndicesNV array.
}
#endif
}
18 changes: 7 additions & 11 deletions examples/skinned_meshlets/source/skinned_meshlets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ class skinned_meshlets_app : public avk::invokee
#if USE_REDIRECTED_GPU_DATA
drawCall.mMeshletDataBuffer = avk::context().create_buffer(avk::memory_usage::device, {},
avk::vertex_buffer_meta::create_from_data(drawCallData.mMeshletData),
avk::storage_buffer_meta::create_from_data(drawCallData.mMeshletData),
avk::uniform_texel_buffer_meta::create_from_data(drawCallData.mMeshletData).describe_only_member(drawCallData.mMeshletData[0])
avk::storage_buffer_meta::create_from_data(drawCallData.mMeshletData)
);
#endif

Expand Down Expand Up @@ -184,7 +183,7 @@ class skinned_meshlets_app : public avk::invokee
mNormalBuffers.push_back(avk::context().create_buffer_view(drawCall.mNormalsBuffer));
mTexCoordsBuffers.push_back(avk::context().create_buffer_view(drawCall.mTexCoordsBuffer));
#if USE_REDIRECTED_GPU_DATA
mMeshletDataBuffers.push_back(avk::context().create_buffer_view(drawCall.mMeshletDataBuffer));
mMeshletDataBuffers.push_back(drawCall.mMeshletDataBuffer);
#endif
mBoneIndicesBuffers.push_back(avk::context().create_buffer_view(drawCall.mBoneIndicesBuffer));
mBoneWeightsBuffers.push_back(avk::context().create_buffer_view(drawCall.mBoneWeightsBuffer));
Expand Down Expand Up @@ -353,7 +352,7 @@ class skinned_meshlets_app : public avk::invokee
avk::context().record_and_submit_with_fence({
mMeshletsBuffer->fill(meshletsGeometry.data(), 0),
matCommands
}, * mQueue)->wait_until_signalled();
}, *mQueue)->wait_until_signalled();

// One for each concurrent frame
const auto concurrentFrames = avk::context().main_window()->number_of_frames_in_flight();
Expand Down Expand Up @@ -391,7 +390,6 @@ class skinned_meshlets_app : public avk::invokee
// Create our graphics mesh pipeline with the required configuration:
auto createGraphicsMeshPipeline = [this](auto taskShader, auto meshShader, uint32_t taskInvocations, uint32_t meshInvocations) {
return avk::context().create_graphics_pipeline_for(
// Specify which shaders the pipeline consists of:
// Specify which shaders the pipeline consists of:
avk::task_shader(taskShader)
.set_specialization_constant(0, taskInvocations),
Expand Down Expand Up @@ -419,7 +417,7 @@ class skinned_meshlets_app : public avk::invokee
avk::descriptor_binding(3, 2, avk::as_uniform_texel_buffer_views(mNormalBuffers)),
avk::descriptor_binding(3, 3, avk::as_uniform_texel_buffer_views(mTexCoordsBuffers)),
#if USE_REDIRECTED_GPU_DATA
avk::descriptor_binding(3, 4, avk::as_uniform_texel_buffer_views(mMeshletDataBuffers)),
avk::descriptor_binding(3, 4, avk::as_storage_buffers(mMeshletDataBuffers)),
#endif
avk::descriptor_binding(3, 5, avk::as_uniform_texel_buffer_views(mBoneIndicesBuffers)),
avk::descriptor_binding(3, 6, avk::as_uniform_texel_buffer_views(mBoneWeightsBuffers)),
Expand Down Expand Up @@ -456,7 +454,6 @@ class skinned_meshlets_app : public avk::invokee
// Add the camera to the composition (and let it handle the updates)
mQuakeCam.set_translation({ 0.0f, -1.0f, 8.0f });
mQuakeCam.set_perspective_projection(glm::radians(60.0f), avk::context().main_window()->aspect_ratio(), 0.3f, 1000.0f);
//mQuakeCam.set_orthographic_projection(-5, 5, -5, 5, 0.5, 100);
avk::current_composition()->add_element(mQuakeCam);

auto imguiManager = avk::current_composition()->element_by_type<avk::imgui_manager>();
Expand Down Expand Up @@ -613,7 +610,7 @@ class skinned_meshlets_app : public avk::invokee
descriptor_binding(3, 2, as_uniform_texel_buffer_views(mNormalBuffers)),
descriptor_binding(3, 3, as_uniform_texel_buffer_views(mTexCoordsBuffers)),
#if USE_REDIRECTED_GPU_DATA
descriptor_binding(3, 4, as_uniform_texel_buffer_views(mMeshletDataBuffers)),
descriptor_binding(3, 4, avk::as_storage_buffers(mMeshletDataBuffers)),
#endif
descriptor_binding(3, 5, as_uniform_texel_buffer_views(mBoneIndicesBuffers)),
descriptor_binding(3, 6, as_uniform_texel_buffer_views(mBoneWeightsBuffers)),
Expand All @@ -635,13 +632,12 @@ class skinned_meshlets_app : public avk::invokee
}),

mTimestampPool->write_timestamp(firstQueryIndex + 1, stage::mesh_shader),
sync::global_memory_barrier(stage::all_graphics + access::memory_write >> stage::all_commands + access::memory_read),
mPipelineStatsPool->end_query(inFlightIndex)
))
.into_command_buffer(cmdBfr)
.then_submit_to(*mQueue)
// Do not start to render before the image has become available:
.waiting_for(imageAvailableSemaphore >> avk::stage::color_attachment_output)
.waiting_for(imageAvailableSemaphore >> stage::color_attachment_output)
.submit();

mainWnd->handle_lifetime(std::move(cmdBfr));
Expand Down Expand Up @@ -674,7 +670,7 @@ class skinned_meshlets_app : public avk::invokee
std::vector<avk::buffer_view> mBoneWeightsBuffers;
std::vector<avk::buffer_view> mBoneIndicesBuffers;
#if USE_REDIRECTED_GPU_DATA
std::vector<avk::buffer_view> mMeshletDataBuffers;
std::vector<avk::buffer> mMeshletDataBuffers;
#endif

bool mHighlightMeshlets = false;
Expand Down
1 change: 1 addition & 0 deletions examples/static_meshlets/shaders/meshlet.mesh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#version 460
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_nonuniform_qualifier : require
Expand Down
1 change: 1 addition & 0 deletions examples/static_meshlets/shaders/meshlet.nv.mesh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#version 460
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_NV_mesh_shader : require
#extension GL_EXT_nonuniform_qualifier : require
Expand Down
2 changes: 2 additions & 0 deletions examples/static_meshlets/shaders/meshlet.nv.task
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#version 460
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_NV_mesh_shader : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_ballot : require
Expand Down
5 changes: 5 additions & 0 deletions examples/static_meshlets/shaders/meshlet.task
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#version 460
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_GOOGLE_include_directive : enable
#extension GL_ARB_shader_draw_parameters : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_ballot : require

Expand Down
7 changes: 5 additions & 2 deletions examples/static_meshlets/source/static_meshlets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,14 +432,18 @@ class static_meshlets_app : public avk::invokee

mPipelineStats = mPipelineStatsPool->get_results<uint64_t, 3>(inFlightIndex, 1, vk::QueryResultFlagBits::e64);
}

auto& pipeline = mUseNvPipeline.value_or(false) ? mPipelineNv : mPipelineExt;
context().record({
mPipelineStatsPool->reset(inFlightIndex, 1),
mPipelineStatsPool->begin_query(inFlightIndex),
mTimestampPool->reset(firstQueryIndex, 2), // reset the two values relevant for the current frame in flight
mTimestampPool->write_timestamp(firstQueryIndex + 0, stage::all_commands), // measure before drawMeshTasks*

// Upload the updated bone matrices into the buffer for the current frame (considering that we have cConcurrentFrames-many concurrent frames):
mViewProjBuffers[inFlightIndex]->fill(glm::value_ptr(viewProjMat), 0),

sync::global_memory_barrier(stage::all_commands >> stage::all_commands, access::memory_write >> access::memory_write | access::memory_read),

command::render_pass(pipeline->renderpass_reference(), context().main_window()->current_backbuffer_reference(), {
command::bind_pipeline(pipeline.as_reference()),
command::bind_descriptors(pipeline->layout(), mDescriptorCache->get_or_create_descriptor_sets({
Expand Down Expand Up @@ -467,7 +471,6 @@ class static_meshlets_app : public avk::invokee
}),

mTimestampPool->write_timestamp(firstQueryIndex + 1, stage::mesh_shader),
sync::global_memory_barrier(stage::all_graphics + access::memory_write >> stage::all_commands + access::memory_read),
mPipelineStatsPool->end_query(inFlightIndex)
})
.into_command_buffer(cmdBfr)
Expand Down

0 comments on commit 4c6e8c9

Please sign in to comment.