Cross vendor mesh shading (#167)

..using `VK_EXT_mesh_shader`. Both exampled adapted.
cg-tuwien · May 31, 2023 · cf59370 · cf59370
1 parent f34f1eb
commit cf59370
Show file tree

Hide file tree

Showing 69 changed files with 20,785 additions and 664 deletions.
diff --git a/auto_vk b/auto_vk
diff --git a/auto_vk_toolkit/include/configure_and_compose.hpp b/auto_vk_toolkit/include/configure_and_compose.hpp
diff --git a/auto_vk_toolkit/include/context_vulkan.hpp b/auto_vk_toolkit/include/context_vulkan.hpp
@@ -40,6 +40,9 @@ namespace avk
 			vk::PhysicalDeviceAccelerationStructureFeaturesKHR& aAccStructureFeatures, vk::PhysicalDeviceRayTracingPipelineFeaturesKHR& aRayTracingPipelineFeatures, vk::PhysicalDeviceRayQueryFeaturesKHR& aRayQueryFeatures
 #else
 			vk::PhysicalDeviceRayTracingFeaturesKHR aRayTracingFeatures
+#endif
+#if VK_HEADER_VERSION >= 239
+			, vk::PhysicalDeviceMeshShaderFeaturesEXT& aMeshShaderFeatures
 #endif
 		);
 
@@ -208,18 +211,22 @@ namespace avk
 		/** Setup debug report callbacks from VK_EXT_debug_report */
 		void setup_vk_debug_report_callback();
 
-		/** Returns a vector containing all elements from @ref sRequiredDeviceExtensions
-		 *  and settings::gRequiredDeviceExtensions
+		/** Returns a vector containing all extensions which are either required or optional and supported.
 		 */
-		std::vector<const char*> get_all_required_device_extensions();
+		const std::vector<const char*>& get_all_enabled_device_extensions() const;
 
 		/** Checks if the given physical device supports the shading rate image feature
 		 */
 		bool supports_shading_rate_image(const vk::PhysicalDevice& device);
-		bool supports_mesh_shader(const vk::PhysicalDevice& device);
-
 		bool shading_rate_image_extension_requested();
-		bool mesh_shader_extension_requested();
+
+#if VK_HEADER_VERSION >= 239
+		bool supports_mesh_shader_ext(const vk::PhysicalDevice& device);
+		bool is_mesh_shader_ext_requested();
+#endif
+		bool supports_mesh_shader_nv(const vk::PhysicalDevice& device);
+		bool is_mesh_shader_nv_requested();
+
 #if VK_HEADER_VERSION >= 162
 		bool ray_tracing_pipeline_extension_requested();
 		bool acceleration_structure_extension_requested();
@@ -230,11 +237,10 @@ namespace avk
 		bool ray_tracing_extension_requested();
 #endif
 
-		/** Checks whether the given physical device supports all the required extensions,
-		 *	namely those stored in @ref settings::gRequiredDeviceExtensions. 
-		 *	Returns true if it does, false otherwise.
+		/** Checks whether the given physical device supports given extensions.
+		 *	@return True if the physical device supports all the extensions, false otherwise.
 		 */
-		bool supports_all_required_extensions(const vk::PhysicalDevice& device);
+		bool supports_given_extensions(const vk::PhysicalDevice& aPhysicalDevice, const std::vector<const char*>& aExtensionsInQuestion) const;
 
 		/** Pick the physical device which looks to be the most promising one */
 		void pick_physical_device();
@@ -256,6 +262,7 @@ namespace avk
 		void set_requested_vulkan11_device_features(vk::PhysicalDeviceVulkan11Features aNewValue) { mRequestedVulkan11DeviceFeatures = aNewValue; }
 
 	public:
+		static std::vector<const char*> sRequiredInstanceExtensions;
 		static std::vector<const char*> sRequiredDeviceExtensions;
 
 		// A mutex which protects the vulkan context from concurrent access from different threads
@@ -265,6 +272,7 @@ namespace avk
 		static std::mutex sConcurrentAccessMutex;
 
 		settings mSettings;
+		std::vector<const char*> mEnabledDeviceExtensions;
 
 		vk::Instance mInstance;
 		VkDebugUtilsMessengerEXT mDebugUtilsCallbackHandle;

diff --git a/auto_vk_toolkit/include/meshlet_helpers.hpp b/auto_vk_toolkit/include/meshlet_helpers.hpp
@@ -236,14 +236,23 @@ namespace avk
 
 	/** Converts meshlets into a GPU usable representation.
 	 *	@param	aMeshlets	The meshlets to convert
-	 *	@tparam	T			Either meshlet_gpu_data or meshlet_indirect_gpu_data. If the indirect representation is used, the meshlet data will also be returned.
-	 *						The meshlet data contains the vertex indices from [mDataOffset] to [mDataOffset + mVertexCount].
-	 *						It also contains the indices into the vertex indices, four uint8 packed into a single uint32,
-	 *						from [mDataOffset + mVertexCount] to [mDataOffset + mVertexCount + (mIndexCount+3)/4]
+	 *	@tparam	T			Either meshlet_gpu_data or meshlet_redirected_gpu_data.
+	 *	                    - meshlet_gpu_data => The output will be one vector of meshlet_gpu_data elements, and the
+	 *	                      second tuple element will be empty.
+	 *	                    - meshlet_redirected_gpu_data => Two vectors are returned: Firstly, a vector of meshlet_redirected_gpu_data elements
+	 *						  which contains index offsets, and for the second tuple element a vector of meshlet indices is returned.
+	 *						  Attention: The vector of indices contains two different regions:
+	 *						   1) The meshlet data contains the vertex indices from [mDataOffset .. mDataOffset+mVertexCount).
+	 *						   2) The indices into the vertex indices, where groups of four uint8_t values are packed into a single uint32_t value.
+	 *						      Region 2 is stored in [mDataOffset+mVertexCount .. mDataOffset+mVertexCount+(mIndexCount+3)/4)
+	 *                      The advantage of the non-redirected representation is easier handling, while the index data must be copied.
+	 *                      The advantage of the redirected representation can be more compressed data, while there is another indirection.
 	 *  @tparam NV			The number of vertices
 	 *  @tparam NI			The number of indices
-	 *  @returns			A Tuple of the converted meshlets into the provided type and the optional meshlet data when the indirect representation
-	 *						is used.
+	 *  @returns			A Tuple of the following structure:
+	 *                      <0>: The input meshlets, converted into the provided output meshlet type.
+	 *                           If T is meshlet_redirected_gpu_data, it will contain offsets into the second tuple element:
+	 *                      <1>: Meshlet indices data, if the redirected representation is used. (For more details, see description of T.)
 	 */
 	template <typename T, size_t NV, size_t NI>
 	std::tuple<std::vector<T>, std::optional<std::vector<uint32_t>>> convert_for_gpu_usage(const std::vector<meshlet>& aMeshlets)
@@ -285,13 +294,21 @@ namespace avk
 
 	/** Converts meshlets into a GPU usable representation.
 	 *	@param	aMeshlets	The meshlets to convert
-	 *	@tparam	T			Either meshlet_gpu_data or meshlet_indirect_gpu_data. If the indirect representation is used, the meshlet data will also be returned.
-	 *						The meshlet data contains the vertex indices from [mDataOffset] to [mDataOffset + mVertexCount].
-	 *						It also contains the indices into the vertex indices, four uint8 packed into a single uint32,
-	 *						from [mDataOffset + mVertexCount] to [mDataOffset + mVertexCount + (mIndexCount+3)/4]
-	 *						T must provide static members ::sNumVertices and ::sNumIndices
-	 *  @returns			A Tuple of the converted meshlets into the provided type and the optional meshlet data when the indirect representation
-	 *						is used.
+	 *	@tparam	T			Either meshlet_gpu_data or meshlet_redirected_gpu_data.
+	 *	                    - meshlet_gpu_data => The output will be one vector of meshlet_gpu_data elements, and the
+	 *	                      second tuple element will be empty.
+	 *	                    - meshlet_redirected_gpu_data => Two vectors are returned: Firstly, a vector of meshlet_redirected_gpu_data elements
+	 *						  which contains index offsets, and for the second tuple element a vector of meshlet indices is returned.
+	 *						  Attention: The vector of indices contains two different regions:
+	 *						   1) The meshlet data contains the vertex indices from [mDataOffset .. mDataOffset+mVertexCount).
+	 *						   2) The indices into the vertex indices, where groups of four uint8_t values are packed into a single uint32_t value.
+	 *						      Region 2 is stored in [mDataOffset+mVertexCount .. mDataOffset+mVertexCount+(mIndexCount+3)/4)
+	 *                      The advantage of the non-redirected representation is easier handling, while the index data must be copied.
+	 *                      The advantage of the redirected representation can be more compressed data, while there is another indirection.
+	 *  @returns			A Tuple of the following structure:
+	 *                      <0>: The input meshlets, converted into the provided output meshlet type.
+	 *                           If T is meshlet_redirected_gpu_data, it will contain offsets into the second tuple element:
+	 *                      <1>: Meshlet indices data, if the redirected representation is used. (For more details, see description of T.)
 	 */
 	template <typename T> requires has_static_num_vertices_and_num_indices<T>
 	std::tuple<std::vector<T>, std::optional<std::vector<uint32_t>>> convert_for_gpu_usage(const std::vector<meshlet>& aMeshlets)
@@ -302,14 +319,23 @@ namespace avk
 	/** Converts meshlets into a GPU usable representation.
 	 *  @param  aSerializer The serializer for the meshlet gpu data.
 	 *	@param	aMeshlets	The meshlets to convert
-	 *	@tparam	T			Either meshlet_gpu_data or meshlet_indirect_gpu_data. If the indirect representation is used, the meshlet data will also be returned.
-	 *						The meshlet data contains the vertex indices from [mDataOffset] to [mDataOffset + mVertexCount].
-	 *						It also contains the indices into the vertex indices, four uint8 packed into a single uint32,
-	 *						from [mDataOffset + mVertexCount] to [mDataOffset + mVertexCount + (mIndexCount+3)/4]
+	 *	@tparam	T			Either meshlet_gpu_data or meshlet_redirected_gpu_data.
+	 *	                    - meshlet_gpu_data => The output will be one vector of meshlet_gpu_data elements, and the
+	 *	                      second tuple element will be empty.
+	 *	                    - meshlet_redirected_gpu_data => Two vectors are returned: Firstly, a vector of meshlet_redirected_gpu_data elements
+	 *						  which contains index offsets, and for the second tuple element a vector of meshlet indices is returned.
+	 *						  Attention: The vector of indices contains two different regions:
+	 *						   1) The meshlet data contains the vertex indices from [mDataOffset .. mDataOffset+mVertexCount).
+	 *						   2) The indices into the vertex indices, where groups of four uint8_t values are packed into a single uint32_t value.
+	 *						      Region 2 is stored in [mDataOffset+mVertexCount .. mDataOffset+mVertexCount+(mIndexCount+3)/4)
+	 *                      The advantage of the non-redirected representation is easier handling, while the index data must be copied.
+	 *                      The advantage of the redirected representation can be more compressed data, while there is another indirection.
 	 *  @tparam NV			The number of vertices
-	 *	@tparam NI			The number of indices
-	 *  @returns			A Tuple of the converted meshlets into the provided type and the optional meshlet data when the indirect representation
-	 *						is used.
+	 *  @tparam NI			The number of indices
+	 *  @returns			A Tuple of the following structure:
+	 *                      <0>: The input meshlets, converted into the provided output meshlet type.
+	 *                           If T is meshlet_redirected_gpu_data, it will contain offsets into the second tuple element:
+	 *                      <1>: Meshlet indices data, if the redirected representation is used. (For more details, see description of T.)
 	 */
 	template <typename T, size_t NV, size_t NI>
 	std::tuple<std::vector<T>, std::optional<std::vector<uint32_t>>> convert_for_gpu_usage_cached(serializer& aSerializer, const std::vector<meshlet>& aMeshlets)
@@ -341,13 +367,21 @@ namespace avk
 	/** Converts meshlets into a GPU usable representation.
 	 *  @param  aSerializer The serializer for the meshlet gpu data.
 	 *	@param	aMeshlets	The meshlets to convert
-	 *	@tparam	T			Either meshlet_gpu_data or meshlet_indirect_gpu_data. If the indirect representation is used, the meshlet data will also be returned.
-	 *						The meshlet data contains the vertex indices from [mDataOffset] to [mDataOffset + mVertexCount].
-	 *						It also contains the indices into the vertex indices, four uint8 packed into a single uint32,
-	 *						from [mDataOffset + mVertexCount] to [mDataOffset + mVertexCount + (mIndexCount+3)/4].
-	 *						T must provide static members ::sNumVertices and ::sNumIndices
-	 *  @returns			A Tuple of the converted meshlets into the provided type and the optional meshlet data when the indirect representation
-	 *						is used.
+	 *	@tparam	T			Either meshlet_gpu_data or meshlet_redirected_gpu_data.
+	 *	                    - meshlet_gpu_data => The output will be one vector of meshlet_gpu_data elements, and the
+	 *	                      second tuple element will be empty.
+	 *	                    - meshlet_redirected_gpu_data => Two vectors are returned: Firstly, a vector of meshlet_redirected_gpu_data elements
+	 *						  which contains index offsets, and for the second tuple element a vector of meshlet indices is returned.
+	 *						  Attention: The vector of indices contains two different regions:
+	 *						   1) The meshlet data contains the vertex indices from [mDataOffset .. mDataOffset+mVertexCount).
+	 *						   2) The indices into the vertex indices, where groups of four uint8_t values are packed into a single uint32_t value.
+	 *						      Region 2 is stored in [mDataOffset+mVertexCount .. mDataOffset+mVertexCount+(mIndexCount+3)/4)
+	 *						The advantage of the non-redirected representation is easier handling, while the index data must be copied.
+	 *                      The advantage of the redirected representation can be more compressed data, while there is another indirection.
+	 *  @returns			A Tuple of the following structure:
+	 *                      <0>: The input meshlets, converted into the provided output meshlet type.
+	 *                           If T is meshlet_redirected_gpu_data, it will contain offsets into the second tuple element:
+	 *                      <1>: Meshlet indices data, if the redirected representation is used. (For more details, see description of T.)
 	 */
 	template <typename T> requires has_static_num_vertices_and_num_indices<T>
 	std::tuple<std::vector<T>, std::optional<std::vector<uint32_t>>> convert_for_gpu_usage_cached(serializer& aSerializer, const std::vector<meshlet>& aMeshlets)

diff --git a/auto_vk_toolkit/include/settings.hpp b/auto_vk_toolkit/include/settings.hpp
@@ -157,6 +157,26 @@ namespace avk
 		std::vector<const char*> mExtensions;
 	};
 
+	/** Fill this vector with optional device extensions */
+	struct optional_device_extensions
+	{
+		optional_device_extensions(const char* aExtensionName = nullptr)
+		{
+			if (nullptr != aExtensionName) {
+				mExtensions.push_back(aExtensionName);
+			}
+		}
+
+		optional_device_extensions& add_extension(const char* aExtensionName)
+		{
+			assert(nullptr != aExtensionName);
+			mExtensions.push_back(aExtensionName);
+			return *this;
+		}
+
+		std::vector<const char*> mExtensions;
+	};
+
 	/** Pass a function to modify the requested physical device features.
 	 *	@typedef	F		void(vk::PhysicalDeviceFeatures&)
 	 *						Modify the values of the passed reference to vk::PhysicalDeviceFeatures& directly!
@@ -204,6 +224,7 @@ namespace avk
 		required_instance_extensions mRequiredInstanceExtensions;
 		validation_layers mValidationLayers;
 		required_device_extensions mRequiredDeviceExtensions;
+		optional_device_extensions mOptionalDeviceExtensions;
 		vk::DebugUtilsMessageSeverityFlagsEXT mEnabledDebugUtilsMessageSeverities = vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose;
 		vk::DebugUtilsMessageTypeFlagsEXT mEnabledDebugUtilsMessageTypes = vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance;
 	};