diff --git a/CHANGELOG.md b/CHANGELOG.md index c7bf265f77..d540443a35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,9 @@ Bottom level categories: - BREAKING: Migrated from the `maxInterStageShaderComponents` limit to `maxInterStageShaderVariables`, which changes validation in a way that should not affect most programs. This follows the latest changes of the WebGPU spec. By @ErichDonGubler in [#8652](https://github.com/gfx-rs/wgpu/pull/8652), [#8792](https://github.com/gfx-rs/wgpu/pull/8792). - Fixed validation of the texture format in GPUDepthStencilState when neither depth nor stencil is actually enabled. By @andyleiserson in [#8766](https://github.com/gfx-rs/wgpu/pull/8766). +#### Vulkan +- Fixed a variety of mesh shader SPIR-V writer issues from the original implementation. By @inner-daemons in [#8756](https://github.com/gfx-rs/wgpu/pull/8756) + #### GLES - `DisplayHandle` should now be passed to `InstanceDescriptor` for correct EGL initialization on Wayland. By @MarijnS95 in [#8012](https://github.com/gfx-rs/wgpu/pull/8012) diff --git a/docs/api-specs/mesh_shading.md b/docs/api-specs/mesh_shading.md index c2b0a5202a..7cc6fea0a2 100644 --- a/docs/api-specs/mesh_shading.md +++ b/docs/api-specs/mesh_shading.md @@ -130,13 +130,11 @@ A task shader entry point must have a `@workgroup_size` attribute, meeting the s A task shader entry point must also have a `@payload(G)` property, where `G` is the name of a global variable in the `task_payload` address space. Each task shader workgroup has its own instance of this variable, visible to all invocations in the workgroup. Whatever value the workgroup collectively stores in that global variable becomes the **task payload**, and is provided to all invocations in the mesh shader grid dispatched for the workgroup. A task payload variable must be at least 4 bytes in size. -A task shader entry point must return a `vec3` value. The return value of each workgroup's first invocation (that is, the one whose `local_invocation_index` is `0`) is taken as the size of a **mesh shader grid** to dispatch, measured in workgroups. (If the task shader entry point returns `vec3(0, 0, 0)`, then no mesh shaders are dispatched.) Mesh shader grids are described in the next section. +A task shader entry point must return a `vec3` value decorated with `@builtin(mesh_task_size)`. The return value of each workgroup's first invocation (that is, the one whose `local_invocation_index` is `0`) is taken as the size of a **mesh shader grid** to dispatch, measured in workgroups. (If the task shader entry point returns `vec3(0, 0, 0)`, then no mesh shaders are dispatched.) Mesh shader grids are described in the next section. Each task shader workgroup dispatches an independent mesh shader grid: in mesh shader invocations, `@builtin` values like `workgroup_id` and `global_invocation_id` describe the position of the workgroup and invocation within that grid; and `@builtin(num_workgroups)` matches the task shader workgroup's return value. Mesh shaders dispatched for other task shader workgroups are not included in the count. If it is necessary for a mesh shader to know which task shader workgroup dispatched it, the task shader can include its own workgroup id in the task payload. -Task shaders must return a value of type `vec3` decorated with `@builtin(mesh_task_size)`. - Task shaders can use compute and subgroup builtin inputs, in addition to `view_index` and `draw_id`. ### Mesh shader diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index 418e510e1b..e492eeee93 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -260,11 +260,12 @@ impl ExampleContext { async fn init_async(surface: &mut SurfaceWrapper, window: Arc) -> Self { log::info!("Initializing wgpu..."); - let instance_descriptor = wgpu::InstanceDescriptor::from_env_or_default() + let mut instance_descriptor = wgpu::InstanceDescriptor::from_env_or_default() .with_display_handle(Box::new( // TODO: Use event_loop.owned_display_handle() with winit 0.30 window.clone(), )); + instance_descriptor.backend_options.dx12.shader_compiler = wgpu::Dx12Compiler::StaticDxc; let instance = wgpu::Instance::new(instance_descriptor); surface.pre_adapter(&instance, window); let adapter = get_adapter_with_capabilities_or_from_env( diff --git a/examples/features/src/mesh_shader/mod.rs b/examples/features/src/mesh_shader/mod.rs index 20f069e9a8..70aacec190 100644 --- a/examples/features/src/mesh_shader/mod.rs +++ b/examples/features/src/mesh_shader/mod.rs @@ -1,9 +1,15 @@ // Same as in mesh shader tests fn compile_wgsl(device: &wgpu::Device) -> wgpu::ShaderModule { - device.create_shader_module(wgpu::ShaderModuleDescriptor { - label: None, - source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), - }) + // Workgroup memory zero initialization can be expensive for mesh shaders + unsafe { + device.create_shader_module_trusted( + wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), + }, + wgpu::ShaderRuntimeChecks::unchecked(), + ) + } } fn compile_hlsl(device: &wgpu::Device, entry: &str, stage_str: &str) -> wgpu::ShaderModule { let out_path = format!( diff --git a/examples/features/src/mesh_shader/shader.wgsl b/examples/features/src/mesh_shader/shader.wgsl index e0d03ea13d..78732a6841 100644 --- a/examples/features/src/mesh_shader/shader.wgsl +++ b/examples/features/src/mesh_shader/shader.wgsl @@ -33,12 +33,15 @@ var workgroupData: f32; @task @payload(taskPayload) -@workgroup_size(1) -fn ts_main() -> @builtin(mesh_task_size) vec3 { - workgroupData = 1.0; - taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; - return vec3(1, 1, 1); +@workgroup_size(64) +fn ts_main(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { + workgroupData = 1.0; + taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); + taskPayload.visible = true; + return vec3(1, 1, 1); + } + return vec3(0, 0, 0); } struct MeshOutput { @@ -52,24 +55,27 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) -fn ms_main() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; +@workgroup_size(64) +fn ms_main(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { + mesh_output.vertex_count = 3; + mesh_output.primitive_count = 1; + workgroupData = 2.0; - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; + mesh_output.vertices[0].position = positions[0]; + mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; + mesh_output.vertices[1].position = positions[1]; + mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; + mesh_output.vertices[2].position = positions[2]; + mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + mesh_output.primitives[0].indices = vec3(0, 1, 2); + mesh_output.primitives[0].cull = !taskPayload.visible; + mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + return; + } } @fragment diff --git a/naga-test/src/lib.rs b/naga-test/src/lib.rs index 51db424ff7..3551b63c85 100644 --- a/naga-test/src/lib.rs +++ b/naga-test/src/lib.rs @@ -164,6 +164,10 @@ impl SpirvOutParameters { ray_query_initialization_tracking: true, debug_info, use_storage_input_output_16: self.use_storage_input_output_16, + task_runtime_limits: Some(spv::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: 256, + max_mesh_workgroups_total: 1024, + }), } } } diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs index 91fca9e42b..d4599e9e11 100644 --- a/naga/src/back/mod.rs +++ b/naga/src/back/mod.rs @@ -128,6 +128,7 @@ fn get_entry_points( /// [`EntryPoint`]: crate::EntryPoint /// [`Module`]: crate::Module /// [`Module::entry_points`]: crate::Module::entry_points +#[derive(Clone, Copy, Debug)] pub enum FunctionType { /// A regular function. Function(crate::Handle), diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index a2d2f9f822..6e87d0f39d 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -222,12 +222,11 @@ impl Writer { ir_result: &crate::FunctionResult, result_members: &[ResultMember], body: &mut Vec, - task_payload: Option, ) -> Result { for (index, res_member) in result_members.iter().enumerate() { // This isn't a real builtin, and is handled elsewhere if res_member.built_in == Some(crate::BuiltIn::MeshTaskSize) { - continue; + return Ok(Instruction::return_value(value_id)); } let member_value_id = match ir_result.binding { Some(_) => value_id, @@ -259,13 +258,7 @@ impl Writer { _ => {} } } - self.try_write_entry_point_task_return( - value_id, - ir_result, - result_members, - body, - task_payload, - ) + Ok(Instruction::return_void()) } } @@ -3342,7 +3335,6 @@ impl BlockContext<'_> { self.ir_function.result.as_ref().unwrap(), &context.results, &mut block.body, - context.task_payload_variable_id, )?, None => Instruction::return_value(value_id), }; @@ -3350,18 +3342,7 @@ impl BlockContext<'_> { return Ok(BlockExitDisposition::Discarded); } Statement::Return { value: None } => { - if let Some(super::EntryPointContext { - mesh_state: Some(ref mesh_state), - .. - }) = self.function.entry_point_context - { - self.function.consume( - block, - Instruction::branch(mesh_state.entry_point_epilogue_id), - ); - } else { - self.function.consume(block, Instruction::return_void()); - } + self.function.consume(block, Instruction::return_void()); return Ok(BlockExitDisposition::Discarded); } Statement::Kill => { @@ -3857,16 +3838,6 @@ impl BlockContext<'_> { LoopContext::default(), debug_info, )?; - if let Some(super::EntryPointContext { - mesh_state: Some(ref mesh_state), - .. - }) = self.function.entry_point_context - { - let mut block = Block::new(mesh_state.entry_point_epilogue_id); - self.writer - .write_mesh_shader_return(mesh_state, &mut block)?; - self.function.consume(block, Instruction::return_void()); - } Ok(()) } diff --git a/naga/src/back/spv/mesh_shader.rs b/naga/src/back/spv/mesh_shader.rs index a026c763af..d0c08895aa 100644 --- a/naga/src/back/spv/mesh_shader.rs +++ b/naga/src/back/spv/mesh_shader.rs @@ -4,7 +4,7 @@ use spirv::Word; use crate::{ back::spv::{ helpers::BindingDecorations, writer::FunctionInterface, Block, EntryPointContext, Error, - Instruction, ResultMember, WriterFlags, + Instruction, WriterFlags, }, non_max_u32::NonMaxU32, Handle, @@ -36,15 +36,9 @@ pub struct MeshReturnInfo { /// All members of the output variable struct type out_members: Vec, /// Id of the input variable for local invocation id - local_invocation_index_id: Word, + local_invocation_index_var_id: Word, /// Total workgroup size (product) workgroup_size: u32, - /// Variable to be used later when saving the output as a loop index - loop_counter_vertices: Word, - /// Variable to be used later when saving the output as a loop index - loop_counter_primitives: Word, - /// The id of the label to jump to when `return` is called - pub entry_point_epilogue_id: Word, /// Vertex-specific info vertex_info: PerOutputTypeMeshReturnInfo, @@ -55,16 +49,6 @@ pub struct MeshReturnInfo { } impl super::Writer { - pub(super) fn require_mesh_shaders(&mut self) -> Result<(), Error> { - self.use_extension("SPV_EXT_mesh_shader"); - self.require_any("Mesh Shaders", &[spirv::Capability::MeshShadingEXT])?; - let lang_version = self.lang_version(); - if lang_version.0 <= 1 && lang_version.1 < 4 { - return Err(Error::SpirvVersionTooLow(1, 4)); - } - Ok(()) - } - /// Sets up an output variable that will handle part of the mesh shader output pub(super) fn write_mesh_return_global_variable( &mut self, @@ -88,7 +72,6 @@ impl super::Writer { iface: &mut FunctionInterface, local_invocation_index_id: Option, ir_module: &crate::Module, - prelude: &mut Block, ep_context: &mut EntryPointContext, ) -> Result<(), Error> { let Some(ref mesh_info) = iface.mesh_info else { @@ -146,7 +129,7 @@ impl super::Writer { _ => unreachable!(), }; // In the final return, we do a giant memcpy, for which this is helpful - let local_invocation_index_id = match local_invocation_index_id { + let local_invocation_index_var_id = match local_invocation_index_id { Some(a) => a, None => { let u32_id = self.get_u32_type_id(); @@ -166,48 +149,17 @@ impl super::Writer { .to_words(&mut self.logical_layout.annotations); iface.varying_ids.push(var); - let loaded_value = self.id_gen.next(); - prelude - .body - .push(Instruction::load(u32_id, loaded_value, var, None)); - loaded_value + var } }; - let u32_id = self.get_u32_type_id(); - // A general function variable that we guarantee to allow in the final return. It must be - // declared at the top of the function. Currently it is used in the memcpy part to keep - // track of the current index to copy. - let loop_counter_1 = self.id_gen.next(); - let loop_counter_2 = self.id_gen.next(); - prelude.body.insert( - 0, - Instruction::variable( - self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), - loop_counter_1, - spirv::StorageClass::Function, - None, - ), - ); - prelude.body.insert( - 1, - Instruction::variable( - self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), - loop_counter_2, - spirv::StorageClass::Function, - None, - ), - ); // This is the information that is passed to the function writer // so that it can write the final return logic let mut mesh_return_info = MeshReturnInfo { out_variable_id: self.global_variables[mesh_info.output_variable].var_id, out_members, - local_invocation_index_id, + local_invocation_index_var_id, workgroup_size: self .get_constant_scalar(crate::Literal::U32(iface.workgroup_size.iter().product())), - loop_counter_vertices: loop_counter_1, - loop_counter_primitives: loop_counter_2, - entry_point_epilogue_id: self.id_gen.next(), vertex_info: PerOutputTypeMeshReturnInfo { array_type_id: vertex_array_type_id, @@ -457,60 +409,33 @@ impl super::Writer { Ok(()) } - pub(super) fn try_write_entry_point_task_return( + pub(super) fn write_entry_point_task_return( &mut self, value_id: Word, - ir_result: &crate::FunctionResult, - result_members: &[ResultMember], body: &mut Vec, - task_payload: Option, + task_payload: Word, ) -> Result { // OpEmitMeshTasksEXT must be called right before exiting (after setting other // output variables if there are any) - for (index, res_member) in result_members.iter().enumerate() { - if res_member.built_in == Some(crate::BuiltIn::MeshTaskSize) { - self.write_control_barrier(crate::Barrier::WORK_GROUP, body); - // If its a function like `fn a() -> @builtin(...) vec3 ...` - // then just use the output value. If it's a struct, extract the - // value from the struct. - let member_value_id = match ir_result.binding { - Some(_) => value_id, - None => { - let member_value_id = self.id_gen.next(); - body.push(Instruction::composite_extract( - res_member.type_id, - member_value_id, - value_id, - &[index as Word], - )); - member_value_id - } - }; - // Extract the vec3 into 3 u32's - let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; - for (i, &value) in values.iter().enumerate() { - let instruction = Instruction::composite_extract( - self.get_u32_type_id(), - value, - member_value_id, - &[i as Word], - ); - body.push(instruction); - } - // TODO: make this guaranteed to be uniform - let mut instruction = Instruction::new(spirv::Op::EmitMeshTasksEXT); - for id in values { - instruction.add_operand(id); - } - // We have to include the task payload in our call - if let Some(task_payload) = task_payload { - instruction.add_operand(task_payload); - } - return Ok(instruction); - } + // Extract the vec3 into 3 u32's + let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, &value) in values.iter().enumerate() { + let instruction = Instruction::composite_extract( + self.get_u32_type_id(), + value, + value_id, + &[i as Word], + ); + body.push(instruction); + } + let mut instruction = Instruction::new(spirv::Op::EmitMeshTasksEXT); + for id in values { + instruction.add_operand(id); } - Ok(Instruction::return_void()) + // We have to include the task payload in our call + instruction.add_operand(task_payload); + Ok(instruction) } /// This writes the actual loop @@ -708,9 +633,10 @@ impl super::Writer { &mut self, return_info: &MeshReturnInfo, block: &mut Block, + loop_counter_vertices: u32, + loop_counter_primitives: u32, + local_invocation_index_id: Word, ) -> Result<(), Error> { - // Start with a control barrier so that everything that follows is guaranteed to see the same variables - self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); let u32_id = self.get_u32_type_id(); // Load the actual vertex and primitive counts @@ -780,8 +706,6 @@ impl super::Writer { return_info.primitive_info.array_type_id, ); - self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); - // This must be called exactly once before any other mesh outputs are written { let mut ins = Instruction::new(spirv::Op::SetMeshOutputsEXT); @@ -798,8 +722,8 @@ impl super::Writer { let func_end = self.id_gen.next(); block.body.push(Instruction::store( - return_info.loop_counter_vertices, - return_info.local_invocation_index_id, + loop_counter_vertices, + local_invocation_index_id, None, )); block.body.push(Instruction::branch(vertex_loop_header)); @@ -807,7 +731,7 @@ impl super::Writer { let vertex_copy_body = self.write_mesh_copy_body( false, return_info, - return_info.loop_counter_vertices, + loop_counter_vertices, vert_array_ptr, prim_array_ptr, ); @@ -818,7 +742,7 @@ impl super::Writer { vertex_loop_header, in_between_loops, vert_count_id, - return_info.loop_counter_vertices, + loop_counter_vertices, return_info, ); @@ -827,8 +751,8 @@ impl super::Writer { block.body.push(Instruction::label(in_between_loops)); block.body.push(Instruction::store( - return_info.loop_counter_primitives, - return_info.local_invocation_index_id, + loop_counter_primitives, + local_invocation_index_id, None, )); @@ -837,7 +761,7 @@ impl super::Writer { let primitive_copy_body = self.write_mesh_copy_body( true, return_info, - return_info.loop_counter_primitives, + loop_counter_primitives, vert_array_ptr, prim_array_ptr, ); @@ -848,11 +772,211 @@ impl super::Writer { prim_loop_header, func_end, prim_count_id, - return_info.loop_counter_primitives, + loop_counter_primitives, return_info, ); block.body.push(Instruction::label(func_end)); Ok(()) } + + pub(super) fn write_mesh_shader_wrapper( + &mut self, + return_info: &MeshReturnInfo, + inner_id: u32, + ) -> Result { + let out_id = self.id_gen.next(); + let mut function = super::Function::default(); + let lookup_function_type = super::LookupFunctionType { + parameter_type_ids: alloc::vec![], + return_type_id: self.void_type, + }; + let function_type = self.get_function_type(lookup_function_type); + function.signature = Some(Instruction::function( + self.void_type, + out_id, + spirv::FunctionControl::empty(), + function_type, + )); + let u32_id = self.get_u32_type_id(); + { + let mut block = Block::new(self.id_gen.next()); + // A general function variable that we guarantee to allow in the final return. It must be + // declared at the top of the function. Currently it is used in the memcpy part to keep + // track of the current index to copy. + let loop_counter_vertices = self.id_gen.next(); + let loop_counter_primitives = self.id_gen.next(); + block.body.insert( + 0, + Instruction::variable( + self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), + loop_counter_vertices, + spirv::StorageClass::Function, + None, + ), + ); + block.body.insert( + 1, + Instruction::variable( + self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), + loop_counter_primitives, + spirv::StorageClass::Function, + None, + ), + ); + let local_invocation_index_id = self.id_gen.next(); + block.body.push(Instruction::load( + u32_id, + local_invocation_index_id, + return_info.local_invocation_index_var_id, + None, + )); + block.body.push(Instruction::function_call( + self.void_type, + self.id_gen.next(), + inner_id, + &[], + )); + self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); + self.write_mesh_shader_return( + return_info, + &mut block, + loop_counter_vertices, + loop_counter_primitives, + local_invocation_index_id, + )?; + function.consume(block, Instruction::return_void()); + } + function.to_words(&mut self.logical_layout.function_definitions); + Ok(out_id) + } + + pub(super) fn write_task_shader_wrapper( + &mut self, + task_payload: Word, + inner_id: u32, + ) -> Result { + let out_id = self.id_gen.next(); + let mut function = super::Function::default(); + let lookup_function_type = super::LookupFunctionType { + parameter_type_ids: alloc::vec![], + return_type_id: self.void_type, + }; + let function_type = self.get_function_type(lookup_function_type); + function.signature = Some(Instruction::function( + self.void_type, + out_id, + spirv::FunctionControl::empty(), + function_type, + )); + + { + let mut block = Block::new(self.id_gen.next()); + let result = self.id_gen.next(); + block.body.push(Instruction::function_call( + self.get_vec3u_type_id(), + result, + inner_id, + &[], + )); + self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); + let final_value = if let Some(task_limits) = self.task_runtime_limits { + let zero_u32 = self.get_constant_scalar(crate::Literal::U32(0)); + // If its greater than 2<<21 then overflow is possible without being caught + let max_per_dim = self.get_constant_scalar(crate::Literal::U64( + task_limits.max_mesh_workgroups_per_dim.min(2 << 21) as u64, + )); + let max_total = self.get_constant_scalar(crate::Literal::U64( + task_limits.max_mesh_workgroups_total as u64, + )); + let u64_type_id = self + .get_numeric_type_id(crate::back::spv::NumericType::Scalar(crate::Scalar::U64)); + let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, value) in values.into_iter().enumerate() { + let u32_val = self.id_gen.next(); + block.body.push(Instruction::composite_extract( + self.get_u32_type_id(), + u32_val, + result, + &[i as u32], + )); + block.body.push(Instruction::unary( + spirv::Op::UConvert, + u64_type_id, + value, + u32_val, + )); + } + let prod_1 = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IMul, + u64_type_id, + prod_1, + values[0], + values[1], + )); + let prod = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IMul, + u64_type_id, + prod, + prod_1, + values[2], + )); + let total_too_large = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::UGreaterThanEqual, + self.get_bool_type_id(), + total_too_large, + prod, + max_total, + )); + + let too_large = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, value) in values.into_iter().enumerate() { + block.body.push(Instruction::binary( + spirv::Op::UGreaterThanEqual, + self.get_bool_type_id(), + too_large[i], + value, + max_per_dim, + )); + } + let mut current = total_too_large; + for is_too_large in too_large { + let new = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::LogicalOr, + self.get_bool_type_id(), + new, + current, + is_too_large, + )); + current = new; + } + let zero_vec3 = self.id_gen.next(); + block.body.push(Instruction::composite_construct( + self.get_vec3u_type_id(), + zero_vec3, + &[zero_u32, zero_u32, zero_u32], + )); + let final_result = self.id_gen.next(); + block.body.push(Instruction::select( + self.get_vec3u_type_id(), + final_result, + current, + zero_vec3, + result, + )); + final_result + } else { + result + }; + let ins = + self.write_entry_point_task_return(final_value, &mut block.body, task_payload)?; + function.consume(block, ins); + } + function.to_words(&mut self.logical_layout.function_definitions); + Ok(out_id) + } } diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 9b8fbe618b..c798ba50b7 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -854,6 +854,8 @@ pub struct Writer { /// Non semantic debug printf extension `OpExtInstImport` debug_printf: Option, pub(crate) ray_query_initialization_tracking: bool, + + task_runtime_limits: Option, } bitflags::bitflags! { @@ -930,6 +932,12 @@ pub enum ZeroInitializeWorkgroupMemoryMode { None, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct TaskRuntimeLimits { + pub max_mesh_workgroups_per_dim: u32, + pub max_mesh_workgroups_total: u32, +} + #[derive(Debug, Clone)] pub struct Options<'a> { /// (Major, Minor) target version of the SPIR-V. @@ -971,6 +979,8 @@ pub struct Options<'a> { pub use_storage_input_output_16: bool, pub debug_info: Option>, + + pub task_runtime_limits: Option, } impl Default for Options<'_> { @@ -993,6 +1003,7 @@ impl Default for Options<'_> { ray_query_initialization_tracking: true, use_storage_input_output_16: true, debug_info: None, + task_runtime_limits: None, } } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 3e607dcf77..826d58a472 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -109,6 +109,7 @@ impl Writer { options.use_storage_input_output_16, ), debug_printf: None, + task_runtime_limits: options.task_runtime_limits, }) } @@ -127,6 +128,7 @@ impl Writer { self.binding_map = options.binding_map.clone(); self.io_f16_polyfills = super::f16_polyfill::F16IoPolyfill::new(options.use_storage_input_output_16); + self.task_runtime_limits = options.task_runtime_limits; Ok(()) } @@ -165,6 +167,7 @@ impl Writer { capabilities_available: take(&mut self.capabilities_available), fake_missing_bindings: self.fake_missing_bindings, binding_map: take(&mut self.binding_map), + task_runtime_limits: self.task_runtime_limits, // Initialized afresh: id_gen, @@ -402,14 +405,6 @@ impl Writer { self.get_pointer_type_id(vec2u_id, class) } - pub(super) fn get_vec3u_pointer_type_id(&mut self, class: spirv::StorageClass) -> Word { - let vec3u_id = self.get_numeric_type_id(NumericType::Vector { - size: crate::VectorSize::Tri, - scalar: crate::Scalar::U32, - }); - self.get_pointer_type_id(vec3u_id, class) - } - pub(super) fn get_bool_type_id(&mut self) -> Word { self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::BOOL)) } @@ -770,10 +765,9 @@ impl Writer { mesh_state: None, }; - let mut local_invocation_id = None; - let mut parameter_type_ids = Vec::with_capacity(ir_function.arguments.len()); + let mut local_invocation_index_var_id = None; let mut local_invocation_index_id = None; for argument in ir_function.arguments.iter() { @@ -803,13 +797,9 @@ impl Writer { varying_id, argument_type_id, ); - - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); - } else if binding - == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) - { + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { local_invocation_index_id = Some(id); + local_invocation_index_var_id = Some(varying_id); } id @@ -834,13 +824,10 @@ impl Writer { let id = self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id); constituent_ids.push(id); - - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); - } else if binding - == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { local_invocation_index_id = Some(id); + local_invocation_index_var_id = Some(varying_id); } } prelude.body.push(Instruction::composite_construct( @@ -968,7 +955,11 @@ impl Writer { .body .push(Instruction::store(varying_id, default_value_id, None)); } - self.void_type + if iface.stage == crate::ShaderStage::Task { + self.get_vec3u_type_id() + } else { + self.void_type + } } else { self.get_handle_type_id(result.ty) } @@ -984,9 +975,8 @@ impl Writer { } self.write_entry_point_mesh_shader_info( iface, - local_invocation_index_id, + local_invocation_index_var_id, ir_module, - &mut prelude, &mut ep_context, )?; } @@ -1224,7 +1214,7 @@ impl Writer { next_id, ir_module, info, - local_invocation_id, + local_invocation_index_id, interface, context.function, ), @@ -1249,7 +1239,21 @@ impl Writer { function.to_words(&mut self.logical_layout.function_definitions); - Ok(function_id) + if let Some(EntryPointContext { + mesh_state: Some(ref mesh_state), + .. + }) = function.entry_point_context + { + self.write_mesh_shader_wrapper(mesh_state, function_id) + } else if let Some(EntryPointContext { + task_payload_variable_id: Some(tp), + .. + }) = function.entry_point_context + { + self.write_task_shader_wrapper(tp, function_id) + } else { + Ok(function_id) + } } fn write_execution_mode( @@ -1272,6 +1276,7 @@ impl Writer { debug_info: &Option, ) -> Result { let mut interface_ids = Vec::new(); + let function_id = self.write_function( &entry_point.function, info, @@ -2030,7 +2035,7 @@ impl Writer { entry_id: Word, ir_module: &crate::Module, info: &FunctionInfo, - local_invocation_id: Option, + local_invocation_index: Option, interface: &mut FunctionInterface, function: &mut Function, ) -> Option { @@ -2038,7 +2043,10 @@ impl Writer { .global_variables .iter() .filter(|&(handle, var)| { - !info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup + let task_exception = (var.space == crate::AddressSpace::TaskPayload) + && interface.stage == crate::ShaderStage::Task; + !info[handle].is_empty() + && (var.space == crate::AddressSpace::WorkGroup || task_exception) }) .map(|(handle, var)| { // It's safe to use `var_id` here, not `access_id`, because only @@ -2055,16 +2063,15 @@ impl Writer { return None; } - let uint3_type_id = self.get_vec3u_type_id(); - let mut pre_if_block = Block::new(entry_id); - let local_invocation_id = if let Some(local_invocation_id) = local_invocation_id { - local_invocation_id + let local_invocation_index = if let Some(local_invocation_index) = local_invocation_index { + local_invocation_index } else { let varying_id = self.id_gen.next(); let class = spirv::StorageClass::Input; - let pointer_type_id = self.get_vec3u_pointer_type_id(class); + let u32_ty_id = self.get_u32_type_id(); + let pointer_type_id = self.get_pointer_type_id(u32_ty_id, class); Instruction::variable(pointer_type_id, varying_id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -2072,39 +2079,29 @@ impl Writer { self.decorate( varying_id, spirv::Decoration::BuiltIn, - &[spirv::BuiltIn::LocalInvocationId as u32], + &[spirv::BuiltIn::LocalInvocationIndex as u32], ); interface.varying_ids.push(varying_id); let id = self.id_gen.next(); pre_if_block .body - .push(Instruction::load(uint3_type_id, id, varying_id, None)); + .push(Instruction::load(u32_ty_id, id, varying_id, None)); id }; - let zero_id = self.get_constant_null(uint3_type_id); - let bool3_type_id = self.get_vec3_bool_type_id(); + let zero_id = self.get_constant_scalar(crate::Literal::U32(0)); let eq_id = self.id_gen.next(); pre_if_block.body.push(Instruction::binary( spirv::Op::IEqual, - bool3_type_id, + self.get_bool_type_id(), eq_id, - local_invocation_id, + local_invocation_index, zero_id, )); - let condition_id = self.id_gen.next(); - let bool_type_id = self.get_bool_type_id(); - pre_if_block.body.push(Instruction::relational( - spirv::Op::All, - bool_type_id, - condition_id, - eq_id, - )); - let merge_id = self.id_gen.next(); pre_if_block.body.push(Instruction::selection_merge( merge_id, @@ -2114,7 +2111,7 @@ impl Writer { let accept_id = self.id_gen.next(); function.consume( pre_if_block, - Instruction::branch_conditional(condition_id, accept_id, merge_id), + Instruction::branch_conditional(eq_id, accept_id, merge_id), ); let accept_block = Block { @@ -2339,7 +2336,6 @@ impl Writer { } if per_primitive && stage == crate::ShaderStage::Fragment { others.push(Decoration::PerPrimitiveEXT); - self.require_mesh_shaders()?; } Ok(BindingDecorations::Location { location, @@ -2351,13 +2347,6 @@ impl Writer { use crate::BuiltIn as Bi; let mut others = ArrayVec::new(); - if matches!( - built_in, - Bi::CullPrimitive | Bi::PointIndex | Bi::LineIndices | Bi::TriangleIndices - ) { - self.require_mesh_shaders()?; - } - let built_in = match built_in { Bi::Position { invariant } => { if invariant { @@ -2469,22 +2458,12 @@ impl Writer { BuiltIn::SubgroupLocalInvocationId } Bi::CullPrimitive => { - self.require_mesh_shaders()?; others.push(Decoration::PerPrimitiveEXT); BuiltIn::CullPrimitiveEXT } - Bi::PointIndex => { - self.require_mesh_shaders()?; - BuiltIn::PrimitivePointIndicesEXT - } - Bi::LineIndices => { - self.require_mesh_shaders()?; - BuiltIn::PrimitiveLineIndicesEXT - } - Bi::TriangleIndices => { - self.require_mesh_shaders()?; - BuiltIn::PrimitiveTriangleIndicesEXT - } + Bi::PointIndex => BuiltIn::PrimitivePointIndicesEXT, + Bi::LineIndices => BuiltIn::PrimitiveLineIndicesEXT, + Bi::TriangleIndices => BuiltIn::PrimitiveTriangleIndicesEXT, // No decoration, this EmitMeshTasksEXT is called at function return Bi::MeshTaskSize => return Ok(BindingDecorations::None), // These aren't normal builtins and don't occur in function output @@ -2831,17 +2810,6 @@ impl Writer { | ir_module.special_types.ray_intersection.is_some(); let has_vertex_return = ir_module.special_types.ray_vertex_return.is_some(); - // Ways mesh shaders are required: - // * Mesh entry point used - checked for - // * Mesh function like setVertex used outside mesh entry point, this is handled when those are written - // * Fragment shader with per primitive data - handled in `map_binding` - let has_mesh_shaders = ir_module.entry_points.iter().any(|entry| { - entry.stage == crate::ShaderStage::Mesh || entry.stage == crate::ShaderStage::Task - }) || ir_module - .global_variables - .iter() - .any(|gvar| gvar.1.space == crate::AddressSpace::TaskPayload); - for (_, &crate::Type { ref inner, .. }) in ir_module.types.iter() { // spirv does not know whether these have vertex return - that is done by us if let &crate::TypeInner::AccelerationStructure { .. } @@ -2868,8 +2836,13 @@ impl Writer { Instruction::extension("SPV_KHR_ray_tracing_position_fetch") .to_words(&mut self.logical_layout.extensions); } - if has_mesh_shaders { - self.require_mesh_shaders()?; + if ir_module.uses_mesh_shaders() { + self.use_extension("SPV_EXT_mesh_shader"); + self.require_any("Mesh Shaders", &[spirv::Capability::MeshShadingEXT])?; + let lang_version = self.lang_version(); + if lang_version.0 <= 1 && lang_version.1 < 4 { + return Err(Error::SpirvVersionTooLow(1, 4)); + } } Instruction::type_void(self.void_type).to_words(&mut self.logical_layout.declarations); Instruction::ext_inst_import(self.gl450_ext_inst_id, "GLSL.std.450") diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index 7976a024ab..81259e4ed3 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -278,7 +278,6 @@ impl Writer { let mut needs_f16 = false; let mut needs_dual_source_blending = false; let mut needs_clip_distances = false; - let mut needs_mesh_shaders = false; // Determine which `enable` declarations are needed for (_, ty) in module.types.iter() { @@ -299,25 +298,6 @@ impl Writer { crate::Binding::BuiltIn(crate::BuiltIn::ClipDistance) => { needs_clip_distances = true; } - crate::Binding::Location { - per_primitive: true, - .. - } => { - needs_mesh_shaders = true; - } - crate::Binding::BuiltIn( - crate::BuiltIn::MeshTaskSize - | crate::BuiltIn::CullPrimitive - | crate::BuiltIn::PointIndex - | crate::BuiltIn::LineIndices - | crate::BuiltIn::TriangleIndices - | crate::BuiltIn::VertexCount - | crate::BuiltIn::Vertices - | crate::BuiltIn::PrimitiveCount - | crate::BuiltIn::Primitives, - ) => { - needs_mesh_shaders = true; - } _ => {} } } @@ -326,22 +306,6 @@ impl Writer { } } - if module - .entry_points - .iter() - .any(|ep| matches!(ep.stage, ShaderStage::Mesh | ShaderStage::Task)) - { - needs_mesh_shaders = true; - } - - if module - .global_variables - .iter() - .any(|gv| gv.1.space == crate::AddressSpace::TaskPayload) - { - needs_mesh_shaders = true; - } - // Write required declarations let mut any_written = false; if needs_f16 { @@ -356,7 +320,7 @@ impl Writer { writeln!(self.out, "enable clip_distances;")?; any_written = true; } - if needs_mesh_shaders { + if module.uses_mesh_shaders() { writeln!(self.out, "enable wgpu_mesh_shader;")?; any_written = true; } diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index bc1e72e113..a5a28ad8c0 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -662,6 +662,14 @@ impl super::ShaderStage { Self::Compute | Self::Task | Self::Mesh => true, } } + + /// Mesh or task shader + pub const fn mesh_like(self) -> bool { + match self { + Self::Task | Self::Mesh => true, + Self::Vertex | Self::Fragment | Self::Compute => false, + } + } } #[test] @@ -851,4 +859,64 @@ impl crate::Module { .map(|a| a.with_span_handle(self.global_variables[gv].ty, &self.types)), ) } + + pub fn uses_mesh_shaders(&self) -> bool { + for (_, ty) in self.types.iter() { + match ty.inner { + crate::TypeInner::Struct { ref members, .. } => { + for member in members { + if matches!( + member.binding, + Some(crate::Binding::BuiltIn( + crate::BuiltIn::MeshTaskSize + | crate::BuiltIn::CullPrimitive + | crate::BuiltIn::PointIndex + | crate::BuiltIn::LineIndices + | crate::BuiltIn::TriangleIndices + | crate::BuiltIn::VertexCount + | crate::BuiltIn::Vertices + | crate::BuiltIn::PrimitiveCount + | crate::BuiltIn::Primitives, + )) + ) { + return true; + } + } + } + _ => (), + } + } + if self.entry_points.iter().any(|ep| { + matches!( + ep.stage, + crate::ShaderStage::Mesh | crate::ShaderStage::Task + ) + }) { + return true; + } + if self + .global_variables + .iter() + .any(|gv| gv.1.space == crate::AddressSpace::TaskPayload) + { + return true; + } + false + } +} + +impl crate::MeshOutputTopology { + pub const fn to_builtin(self) -> crate::BuiltIn { + match self { + Self::Points => crate::BuiltIn::PointIndex, + Self::Lines => crate::BuiltIn::LineIndices, + Self::Triangles => crate::BuiltIn::TriangleIndices, + } + } +} + +impl crate::AddressSpace { + pub const fn is_workgroup_like(self) -> bool { + matches!(self, Self::WorkGroup | Self::TaskPayload) + } } diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs index d1ae061890..c9d5ebabd2 100644 --- a/naga/src/valid/analyzer.rs +++ b/naga/src/valid/analyzer.rs @@ -270,7 +270,7 @@ pub struct FunctionInfo { /// `FunctionInfo` implements `core::ops::Index>`, /// so you can simply index this struct with a global handle to retrieve /// its usage information. - global_uses: Box<[GlobalUse]>, + pub global_uses: Box<[GlobalUse]>, /// Information about each expression in this function's body. /// diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index faa0047832..e5b1e9ed2a 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -1087,9 +1087,11 @@ impl super::Validator { } // Task shaders must have a single `MeshTaskSize` output, and nothing else. if ep.stage == crate::ShaderStage::Task { - let ok = result_built_ins.contains(&crate::BuiltIn::MeshTaskSize) - && result_built_ins.len() == 1 - && self.location_mask.is_empty(); + let ok = module.types[fr.ty].inner + == crate::TypeInner::Vector { + size: crate::VectorSize::Tri, + scalar: crate::Scalar::U32, + }; if !ok { return Err(EntryPointError::WrongTaskShaderEntryResult.with_span()); } diff --git a/naga/tests/in/wgsl/mesh-shader-empty.wgsl b/naga/tests/in/wgsl/mesh-shader-empty.wgsl index 98a6bf8448..27ed996611 100644 --- a/naga/tests/in/wgsl/mesh-shader-empty.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-empty.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader-lines.wgsl b/naga/tests/in/wgsl/mesh-shader-lines.wgsl index c475ff1061..c07bc2a7a5 100644 --- a/naga/tests/in/wgsl/mesh-shader-lines.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-lines.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader-points.wgsl b/naga/tests/in/wgsl/mesh-shader-points.wgsl index 84516ee8f2..31429e2df5 100644 --- a/naga/tests/in/wgsl/mesh-shader-points.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-points.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader.wgsl b/naga/tests/in/wgsl/mesh-shader.wgsl index ca2f9c911a..9b57acd10a 100644 --- a/naga/tests/in/wgsl/mesh-shader.wgsl +++ b/naga/tests/in/wgsl/mesh-shader.wgsl @@ -31,13 +31,21 @@ struct PrimitiveInput { var taskPayload: TaskPayload; var workgroupData: f32; +fn helper_reader() -> bool { + return taskPayload.visible; +} +fn helper_writer(value: bool) { + taskPayload.visible = value; +} + @task @payload(taskPayload) @workgroup_size(1) fn ts_main() -> @builtin(mesh_task_size) vec3 { workgroupData = 1.0; taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; + helper_writer(true); + taskPayload.visible = helper_reader(); return vec3(1, 1, 1); } @@ -45,8 +53,8 @@ fn ts_main() -> @builtin(mesh_task_size) vec3 { @task @payload(taskPayload) @workgroup_size(2) -fn ts_divergent(@builtin(local_invocation_index) thread_id: u32) -> @builtin(mesh_task_size) vec3 { - if thread_id == 0 { +fn ts_divergent(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); taskPayload.visible = true; return vec3(1, 1, 1); @@ -82,7 +90,7 @@ fn ms_main() { mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; + mesh_output.primitives[0].cull = !helper_reader(); mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); } @@ -109,9 +117,9 @@ fn ms_no_ts() { // See ts_divergent comment @mesh(mesh_output) -@workgroup_size(1) -fn ms_divergent(@builtin(local_invocation_index) thread_id: u32) { - if thread_id == 0 { +@workgroup_size(2) +fn ms_divergent(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { mesh_output.vertex_count = 3; mesh_output.primitive_count = 1; workgroupData = 2.0; diff --git a/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm b/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm index 4ce80049d2..c64e2c1b76 100644 --- a/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm +++ b/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm @@ -1,14 +1,14 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 131 +; Bound: 127 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %112 "main" %116 +OpEntryPoint GLCompute %112 "main" %115 OpExecutionMode %112 LocalSize 1 1 1 OpDecorate %6 ArrayStride 4 -OpDecorate %116 BuiltIn LocalInvocationId +OpDecorate %115 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeFloat 32 %4 = OpTypeInt 32 1 @@ -56,13 +56,10 @@ OpDecorate %116 BuiltIn LocalInvocationId %102 = OpConstant %3 7 %108 = OpTypePointer Workgroup %5 %114 = OpConstantNull %6 -%115 = OpTypeVector %5 3 -%117 = OpTypePointer Input %115 -%116 = OpVariable %117 Input -%119 = OpConstantNull %115 -%121 = OpTypeBool -%120 = OpTypeVector %121 3 -%126 = OpConstant %5 264 +%116 = OpTypePointer Input %5 +%115 = OpVariable %116 Input +%119 = OpTypeBool +%122 = OpConstant %5 264 %20 = OpFunction %2 None %21 %19 = OpLabel %63 = OpVariable %36 Function %64 @@ -152,20 +149,19 @@ OpFunctionEnd %111 = OpLabel OpBranch %113 %113 = OpLabel -%118 = OpLoad %115 %116 -%122 = OpIEqual %120 %118 %119 -%123 = OpAll %121 %122 -OpSelectionMerge %124 None -OpBranchConditional %123 %125 %124 -%125 = OpLabel +%117 = OpLoad %5 %115 +%118 = OpIEqual %119 %117 %11 +OpSelectionMerge %120 None +OpBranchConditional %118 %121 %120 +%121 = OpLabel OpStore %17 %114 -OpBranch %124 -%124 = OpLabel -OpControlBarrier %30 %30 %126 -OpBranch %127 -%127 = OpLabel -%128 = OpFunctionCall %2 %20 -%129 = OpFunctionCall %2 %100 -%130 = OpFunctionCall %2 %105 +OpBranch %120 +%120 = OpLabel +OpControlBarrier %30 %30 %122 +OpBranch %123 +%123 = OpLabel +%124 = OpFunctionCall %2 %20 +%125 = OpFunctionCall %2 %100 +%126 = OpFunctionCall %2 %105 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm b/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm index d3c2b26be9..c544556caa 100644 --- a/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm +++ b/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm @@ -8,7 +8,7 @@ OpCapability Int64 OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %32 "cs_main" %29 +OpEntryPoint GLCompute %32 "cs_main" %29 %49 OpExecutionMode %32 LocalSize 2 1 1 OpDecorate %5 ArrayStride 8 OpMemberDecorate %8 0 Offset 0 @@ -30,6 +30,7 @@ OpDecorate %19 Binding 2 OpDecorate %20 Block OpMemberDecorate %20 0 Offset 0 OpDecorate %29 BuiltIn LocalInvocationId +OpDecorate %49 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 64 0 %4 = OpTypeInt 64 1 @@ -70,8 +71,8 @@ OpDecorate %29 BuiltIn LocalInvocationId %46 = OpConstantNull %3 %47 = OpConstantNull %5 %48 = OpConstantNull %8 -%49 = OpConstantNull %9 -%50 = OpTypeVector %10 3 +%50 = OpTypePointer Input %7 +%49 = OpVariable %50 Input %55 = OpConstant %7 264 %58 = OpTypeInt 32 1 %57 = OpConstant %58 1 @@ -87,8 +88,8 @@ OpDecorate %29 BuiltIn LocalInvocationId %40 = OpAccessChain %39 %19 %35 OpBranch %45 %45 = OpLabel -%51 = OpIEqual %50 %31 %49 -%52 = OpAll %10 %51 +%51 = OpLoad %7 %49 +%52 = OpIEqual %10 %51 %35 OpSelectionMerge %53 None OpBranchConditional %52 %54 %53 %54 = OpLabel diff --git a/naga/tests/out/spv/wgsl-atomicOps.spvasm b/naga/tests/out/spv/wgsl-atomicOps.spvasm index 839200d769..70f07928d5 100644 --- a/naga/tests/out/spv/wgsl-atomicOps.spvasm +++ b/naga/tests/out/spv/wgsl-atomicOps.spvasm @@ -6,7 +6,7 @@ OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %31 "cs_main" %28 +OpEntryPoint GLCompute %31 "cs_main" %28 %47 OpExecutionMode %31 LocalSize 2 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %7 0 Offset 0 @@ -28,6 +28,7 @@ OpDecorate %18 Binding 2 OpDecorate %19 Block OpMemberDecorate %19 0 Offset 0 OpDecorate %28 BuiltIn LocalInvocationId +OpDecorate %47 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -66,8 +67,8 @@ OpDecorate %28 BuiltIn LocalInvocationId %44 = OpConstantNull %3 %45 = OpConstantNull %5 %46 = OpConstantNull %7 -%47 = OpConstantNull %8 -%48 = OpTypeVector %9 3 +%48 = OpTypePointer Input %3 +%47 = OpVariable %48 Input %53 = OpConstant %3 264 %55 = OpTypePointer StorageBuffer %4 %59 = OpTypePointer Workgroup %4 @@ -79,8 +80,8 @@ OpDecorate %28 BuiltIn LocalInvocationId %39 = OpAccessChain %38 %18 %34 OpBranch %43 %43 = OpLabel -%49 = OpIEqual %48 %30 %47 -%50 = OpAll %9 %49 +%49 = OpLoad %3 %47 +%50 = OpIEqual %9 %49 %34 OpSelectionMerge %51 None OpBranchConditional %50 %52 %51 %52 = OpLabel diff --git a/naga/tests/out/spv/wgsl-globals.spvasm b/naga/tests/out/spv/wgsl-globals.spvasm index 1b5c7a3122..4be9b15701 100644 --- a/naga/tests/out/spv/wgsl-globals.spvasm +++ b/naga/tests/out/spv/wgsl-globals.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 171 +; Bound: 167 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %93 "main" %116 +OpEntryPoint GLCompute %93 "main" %115 OpExecutionMode %93 LocalSize 1 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %9 0 Offset 0 @@ -52,7 +52,7 @@ OpDecorate %49 Block OpMemberDecorate %49 0 Offset 0 OpMemberDecorate %49 0 ColMajor OpMemberDecorate %49 0 MatrixStride 8 -OpDecorate %116 BuiltIn LocalInvocationId +OpDecorate %115 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeBool %4 = OpTypeFloat 32 @@ -126,27 +126,24 @@ OpDecorate %116 BuiltIn LocalInvocationId %111 = OpTypePointer Function %3 %113 = OpConstantNull %5 %114 = OpConstantNull %7 -%115 = OpTypeVector %7 3 -%117 = OpTypePointer Input %115 -%116 = OpVariable %117 Input -%119 = OpConstantNull %115 -%120 = OpTypeVector %3 3 -%125 = OpConstant %7 264 -%128 = OpTypePointer Workgroup %4 -%129 = OpTypePointer Uniform %21 -%130 = OpTypePointer Uniform %20 -%133 = OpTypePointer Uniform %17 -%134 = OpTypePointer Uniform %16 -%135 = OpTypePointer Uniform %12 -%140 = OpConstant %7 7 -%146 = OpConstant %7 6 -%148 = OpTypePointer StorageBuffer %10 -%149 = OpConstant %7 1 -%152 = OpConstant %7 5 -%154 = OpTypePointer Uniform %4 -%155 = OpConstant %7 3 -%158 = OpConstant %7 4 -%170 = OpConstant %23 2 +%116 = OpTypePointer Input %7 +%115 = OpVariable %116 Input +%121 = OpConstant %7 264 +%124 = OpTypePointer Workgroup %4 +%125 = OpTypePointer Uniform %21 +%126 = OpTypePointer Uniform %20 +%129 = OpTypePointer Uniform %17 +%130 = OpTypePointer Uniform %16 +%131 = OpTypePointer Uniform %12 +%136 = OpConstant %7 7 +%142 = OpConstant %7 6 +%144 = OpTypePointer StorageBuffer %10 +%145 = OpConstant %7 1 +%148 = OpConstant %7 5 +%150 = OpTypePointer Uniform %4 +%151 = OpConstant %7 3 +%154 = OpConstant %7 4 +%166 = OpConstant %23 2 %53 = OpFunction %2 None %54 %52 = OpFunctionParameter %8 %51 = OpLabel @@ -198,56 +195,55 @@ OpFunctionEnd %106 = OpAccessChain %105 %48 %60 OpBranch %112 %112 = OpLabel -%118 = OpLoad %115 %116 -%121 = OpIEqual %120 %118 %119 -%122 = OpAll %3 %121 -OpSelectionMerge %123 None -OpBranchConditional %122 %124 %123 -%124 = OpLabel +%117 = OpLoad %7 %115 +%118 = OpIEqual %3 %117 %60 +OpSelectionMerge %119 None +OpBranchConditional %118 %120 %119 +%120 = OpLabel OpStore %26 %113 OpStore %28 %114 -OpBranch %123 -%123 = OpLabel -OpControlBarrier %18 %18 %125 -OpBranch %126 -%126 = OpLabel -%127 = OpFunctionCall %2 %57 -%131 = OpAccessChain %130 %106 %60 %60 -%132 = OpLoad %20 %131 -%136 = OpAccessChain %135 %104 %60 %60 %60 -%137 = OpLoad %12 %136 -%138 = OpMatrixTimesVector %10 %132 %137 -%139 = OpCompositeExtract %4 %138 0 -%141 = OpAccessChain %128 %26 %140 -OpStore %141 %139 -%142 = OpLoad %15 %102 -%143 = OpLoad %8 %100 -%144 = OpMatrixTimesVector %10 %142 %143 -%145 = OpCompositeExtract %4 %144 0 -%147 = OpAccessChain %128 %26 %146 -OpStore %147 %145 -%150 = OpAccessChain %73 %96 %149 %149 -%151 = OpLoad %4 %150 -%153 = OpAccessChain %128 %26 %152 -OpStore %153 %151 -%156 = OpAccessChain %154 %98 %60 %155 +OpBranch %119 +%119 = OpLabel +OpControlBarrier %18 %18 %121 +OpBranch %122 +%122 = OpLabel +%123 = OpFunctionCall %2 %57 +%127 = OpAccessChain %126 %106 %60 %60 +%128 = OpLoad %20 %127 +%132 = OpAccessChain %131 %104 %60 %60 %60 +%133 = OpLoad %12 %132 +%134 = OpMatrixTimesVector %10 %128 %133 +%135 = OpCompositeExtract %4 %134 0 +%137 = OpAccessChain %124 %26 %136 +OpStore %137 %135 +%138 = OpLoad %15 %102 +%139 = OpLoad %8 %100 +%140 = OpMatrixTimesVector %10 %138 %139 +%141 = OpCompositeExtract %4 %140 0 +%143 = OpAccessChain %124 %26 %142 +OpStore %143 %141 +%146 = OpAccessChain %73 %96 %145 %145 +%147 = OpLoad %4 %146 +%149 = OpAccessChain %124 %26 %148 +OpStore %149 %147 +%152 = OpAccessChain %150 %98 %60 %151 +%153 = OpLoad %4 %152 +%155 = OpAccessChain %124 %26 %154 +OpStore %155 %153 +%156 = OpAccessChain %73 %94 %145 %157 = OpLoad %4 %156 -%159 = OpAccessChain %128 %26 %158 -OpStore %159 %157 -%160 = OpAccessChain %73 %94 %149 -%161 = OpLoad %4 %160 -%162 = OpAccessChain %128 %26 %155 -OpStore %162 %161 -%163 = OpAccessChain %73 %94 %60 %60 -%164 = OpLoad %4 %163 -%165 = OpAccessChain %128 %26 %18 +%158 = OpAccessChain %124 %26 %151 +OpStore %158 %157 +%159 = OpAccessChain %73 %94 %60 %60 +%160 = OpLoad %4 %159 +%161 = OpAccessChain %124 %26 %18 +OpStore %161 %160 +%162 = OpAccessChain %73 %94 %145 +OpStore %162 %107 +%163 = OpArrayLength %7 %33 0 +%164 = OpConvertUToF %4 %163 +%165 = OpAccessChain %124 %26 %145 OpStore %165 %164 -%166 = OpAccessChain %73 %94 %149 -OpStore %166 %107 -%167 = OpArrayLength %7 %33 0 -%168 = OpConvertUToF %4 %167 -%169 = OpAccessChain %128 %26 %149 -OpStore %169 %168 -OpAtomicStore %28 %170 %60 %18 +OpAtomicStore %28 %166 %60 %18 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-interface.compute.spvasm b/naga/tests/out/spv/wgsl-interface.compute.spvasm index 912d28d5b0..c3fb080b73 100644 --- a/naga/tests/out/spv/wgsl-interface.compute.spvasm +++ b/naga/tests/out/spv/wgsl-interface.compute.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 53 +; Bound: 50 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -43,12 +43,10 @@ OpDecorate %27 BuiltIn NumWorkgroups %27 = OpVariable %18 Input %30 = OpTypeFunction %2 %32 = OpConstantNull %9 -%33 = OpConstantNull %11 -%34 = OpTypeVector %8 3 -%39 = OpConstant %6 2 -%40 = OpConstant %6 264 -%42 = OpTypePointer Workgroup %6 -%51 = OpConstant %6 0 +%33 = OpConstant %6 0 +%37 = OpConstant %6 2 +%38 = OpConstant %6 264 +%40 = OpTypePointer Workgroup %6 %29 = OpFunction %2 None %30 %16 = OpLabel %19 = OpLoad %11 %17 @@ -58,26 +56,25 @@ OpDecorate %27 BuiltIn NumWorkgroups %28 = OpLoad %11 %27 OpBranch %31 %31 = OpLabel -%35 = OpIEqual %34 %21 %33 -%36 = OpAll %8 %35 -OpSelectionMerge %37 None -OpBranchConditional %36 %38 %37 -%38 = OpLabel +%34 = OpIEqual %8 %24 %33 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %35 +%36 = OpLabel OpStore %14 %32 -OpBranch %37 -%37 = OpLabel -OpControlBarrier %39 %39 %40 -OpBranch %41 -%41 = OpLabel -%43 = OpCompositeExtract %6 %19 0 -%44 = OpCompositeExtract %6 %21 0 -%45 = OpIAdd %6 %43 %44 -%46 = OpIAdd %6 %45 %24 -%47 = OpCompositeExtract %6 %26 0 +OpBranch %35 +%35 = OpLabel +OpControlBarrier %37 %37 %38 +OpBranch %39 +%39 = OpLabel +%41 = OpCompositeExtract %6 %19 0 +%42 = OpCompositeExtract %6 %21 0 +%43 = OpIAdd %6 %41 %42 +%44 = OpIAdd %6 %43 %24 +%45 = OpCompositeExtract %6 %26 0 +%46 = OpIAdd %6 %44 %45 +%47 = OpCompositeExtract %6 %28 0 %48 = OpIAdd %6 %46 %47 -%49 = OpCompositeExtract %6 %28 0 -%50 = OpIAdd %6 %48 %49 -%52 = OpAccessChain %42 %14 %51 -OpStore %52 %50 +%49 = OpAccessChain %40 %14 %33 +OpStore %49 %48 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm index 3783413287..d9f4f96505 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 100 +; Bound: 130 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %20 "ts_main" %15 -OpEntryPoint MeshEXT %44 "ms_main" %15 %30 %40 %43 %17 %47 -OpExecutionMode %20 LocalSize 1 1 1 -OpExecutionMode %44 LocalSize 1 1 1 -OpExecutionMode %44 OutputTrianglesEXT -OpExecutionMode %44 OutputVertices 3 -OpExecutionMode %44 OutputPrimitivesEXT 1 -OpDecorate %30 BuiltIn LocalInvocationIndex -OpMemberDecorate %37 0 BuiltIn Position -OpDecorate %37 Block -OpDecorate %43 BuiltIn PrimitiveTriangleIndicesEXT +OpEntryPoint TaskEXT %36 "ts_main" %15 %25 +OpEntryPoint MeshEXT %82 "ms_main" %15 %64 %69 %72 %17 %76 +OpExecutionMode %36 LocalSize 64 1 1 +OpExecutionMode %82 LocalSize 64 1 1 +OpExecutionMode %82 OutputTrianglesEXT +OpExecutionMode %82 OutputVertices 3 +OpExecutionMode %82 OutputPrimitivesEXT 1 +OpDecorate %64 BuiltIn LocalInvocationIndex +OpMemberDecorate %66 0 BuiltIn Position +OpDecorate %66 Block +OpDecorate %72 BuiltIn PrimitiveTriangleIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %9 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %14 0 Offset 0 OpMemberDecorate %14 1 Offset 48 OpMemberDecorate %14 2 Offset 64 OpMemberDecorate %14 3 Offset 68 -OpDecorate %47 BuiltIn LocalInvocationId +OpDecorate %25 BuiltIn LocalInvocationIndex +OpDecorate %76 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -45,119 +47,157 @@ OpDecorate %47 BuiltIn LocalInvocationId %15 = OpVariable %16 TaskPayloadWorkgroupEXT %18 = OpTypePointer Workgroup %14 %17 = OpVariable %18 Workgroup -%21 = OpTypeFunction %2 +%21 = OpTypeFunction %8 %22 = OpConstantComposite %8 %13 %13 %13 -%24 = OpConstant %3 2 -%25 = OpConstant %3 264 -%31 = OpTypePointer Input %3 -%30 = OpVariable %31 Input -%35 = OpTypePointer Function %3 -%37 = OpTypeStruct %5 -%38 = OpTypeArray %37 %11 -%39 = OpTypePointer Output %38 -%40 = OpVariable %39 Output -%41 = OpTypeArray %8 %13 -%42 = OpTypePointer Output %41 -%43 = OpVariable %42 Output -%46 = OpConstantNull %14 -%48 = OpTypePointer Input %8 -%47 = OpVariable %48 Input -%50 = OpConstantNull %8 -%52 = OpTypeBool -%51 = OpTypeVector %52 3 -%59 = OpTypePointer Workgroup %3 -%66 = OpTypePointer Workgroup %10 -%67 = OpConstant %3 0 -%69 = OpTypePointer Workgroup %12 -%76 = OpTypePointer Workgroup %5 -%79 = OpTypePointer Output %5 -%89 = OpTypePointer Workgroup %8 -%92 = OpTypePointer Output %8 -%20 = OpFunction %2 None %21 +%24 = OpConstantNull %4 +%26 = OpTypePointer Input %3 +%25 = OpVariable %26 Input +%28 = OpConstant %3 0 +%30 = OpTypeBool +%33 = OpConstant %3 2 +%34 = OpConstant %3 264 +%37 = OpTypeFunction %2 +%41 = OpTypeInt 64 0 +%40 = OpConstant %41 256 +%42 = OpConstant %41 1024 +%64 = OpVariable %26 Input +%65 = OpConstant %3 64 +%66 = OpTypeStruct %5 +%67 = OpTypeArray %66 %11 +%68 = OpTypePointer Output %67 +%69 = OpVariable %68 Output +%70 = OpTypeArray %8 %13 +%71 = OpTypePointer Output %70 +%72 = OpVariable %71 Output +%75 = OpConstantNull %14 +%76 = OpVariable %26 Input +%86 = OpTypePointer Function %3 +%90 = OpTypePointer Workgroup %3 +%97 = OpTypePointer Workgroup %10 +%99 = OpTypePointer Workgroup %12 +%106 = OpTypePointer Workgroup %5 +%109 = OpTypePointer Output %5 +%119 = OpTypePointer Workgroup %8 +%122 = OpTypePointer Output %8 +%20 = OpFunction %8 None %21 %19 = OpLabel OpBranch %23 %23 = OpLabel -OpControlBarrier %24 %24 %25 -%26 = OpCompositeExtract %3 %22 0 -%27 = OpCompositeExtract %3 %22 1 -%28 = OpCompositeExtract %3 %22 2 -OpEmitMeshTasksEXT %26 %27 %28 %15 +%27 = OpLoad %3 %25 +%29 = OpIEqual %30 %27 %28 +OpSelectionMerge %31 None +OpBranchConditional %29 %32 %31 +%32 = OpLabel +OpStore %15 %24 +OpBranch %31 +%31 = OpLabel +OpControlBarrier %33 %33 %34 +OpBranch %35 +%35 = OpLabel +OpReturnValue %22 OpFunctionEnd -%44 = OpFunction %2 None %21 -%29 = OpLabel -%33 = OpVariable %35 Function -%34 = OpVariable %35 Function -%32 = OpLoad %3 %30 -OpBranch %45 -%45 = OpLabel -%49 = OpLoad %8 %47 -%53 = OpIEqual %51 %49 %50 -%54 = OpAll %52 %53 -OpSelectionMerge %55 None -OpBranchConditional %54 %56 %55 -%56 = OpLabel -OpStore %17 %46 -OpBranch %55 -%55 = OpLabel -OpControlBarrier %24 %24 %25 -OpBranch %57 -%57 = OpLabel -OpBranch %36 -%36 = OpLabel -OpControlBarrier %24 %24 %25 -%58 = OpAccessChain %59 %17 %24 -%60 = OpLoad %3 %58 -%61 = OpExtInst %3 %1 UMin %60 %11 -%62 = OpAccessChain %59 %17 %11 -%63 = OpLoad %3 %62 -%64 = OpExtInst %3 %1 UMin %63 %13 -%65 = OpAccessChain %66 %17 %67 -%68 = OpAccessChain %69 %17 %13 -OpControlBarrier %24 %24 %25 -OpSetMeshOutputsEXT %61 %64 -OpStore %33 %32 -OpBranch %70 -%70 = OpLabel -OpLoopMerge %72 %81 None -OpBranch %80 +%36 = OpFunction %2 None %37 +%38 = OpLabel +%39 = OpFunctionCall %8 %20 +OpControlBarrier %33 %33 %34 +%46 = OpCompositeExtract %3 %39 0 +%43 = OpUConvert %41 %46 +%47 = OpCompositeExtract %3 %39 1 +%44 = OpUConvert %41 %47 +%48 = OpCompositeExtract %3 %39 2 +%45 = OpUConvert %41 %48 +%49 = OpIMul %41 %43 %44 +%50 = OpIMul %41 %49 %45 +%51 = OpUGreaterThanEqual %30 %50 %42 +%52 = OpUGreaterThanEqual %30 %43 %40 +%53 = OpUGreaterThanEqual %30 %44 %40 +%54 = OpUGreaterThanEqual %30 %45 %40 +%55 = OpLogicalOr %30 %51 %52 +%56 = OpLogicalOr %30 %55 %53 +%57 = OpLogicalOr %30 %56 %54 +%58 = OpCompositeConstruct %8 %28 %28 %28 +%59 = OpSelect %8 %57 %58 %39 +%60 = OpCompositeExtract %3 %59 0 +%61 = OpCompositeExtract %3 %59 1 +%62 = OpCompositeExtract %3 %59 2 +OpEmitMeshTasksEXT %60 %61 %62 %15 +OpFunctionEnd +%73 = OpFunction %2 None %37 +%63 = OpLabel +OpBranch %74 +%74 = OpLabel +%77 = OpLoad %3 %76 +%78 = OpIEqual %30 %77 %28 +OpSelectionMerge %79 None +OpBranchConditional %78 %80 %79 %80 = OpLabel -%83 = OpLoad %3 %33 -%84 = OpULessThan %52 %83 %61 -OpBranchConditional %84 %82 %72 -%82 = OpLabel -%74 = OpLoad %3 %33 -%75 = OpAccessChain %76 %65 %74 %67 -%77 = OpLoad %5 %75 -%78 = OpAccessChain %79 %40 %74 %67 -OpStore %78 %77 +OpStore %17 %75 +OpBranch %79 +%79 = OpLabel +OpControlBarrier %33 %33 %34 OpBranch %81 %81 = OpLabel -%85 = OpLoad %3 %33 -%86 = OpIAdd %3 %85 %13 -OpStore %33 %86 -OpBranch %70 -%72 = OpLabel -OpStore %34 %32 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %94 None -OpBranch %93 -%93 = OpLabel -%96 = OpLoad %3 %34 -%97 = OpULessThan %52 %96 %64 -OpBranchConditional %97 %95 %73 -%95 = OpLabel -%87 = OpLoad %3 %34 -%88 = OpAccessChain %89 %68 %87 %67 -%90 = OpLoad %8 %88 -%91 = OpAccessChain %92 %43 %87 -OpStore %91 %90 -OpBranch %94 -%94 = OpLabel -%98 = OpLoad %3 %34 -%99 = OpIAdd %3 %98 %13 -OpStore %34 %99 -OpBranch %71 -%73 = OpLabel +OpReturn +OpFunctionEnd +%82 = OpFunction %2 None %37 +%83 = OpLabel +%84 = OpVariable %86 Function +%85 = OpVariable %86 Function +%87 = OpLoad %3 %64 +%88 = OpFunctionCall %2 %73 +OpControlBarrier %33 %33 %34 +%89 = OpAccessChain %90 %17 %33 +%91 = OpLoad %3 %89 +%92 = OpExtInst %3 %1 UMin %91 %11 +%93 = OpAccessChain %90 %17 %11 +%94 = OpLoad %3 %93 +%95 = OpExtInst %3 %1 UMin %94 %13 +%96 = OpAccessChain %97 %17 %28 +%98 = OpAccessChain %99 %17 %13 +OpSetMeshOutputsEXT %92 %95 +OpStore %84 %87 +OpBranch %100 +%100 = OpLabel +OpLoopMerge %102 %111 None +OpBranch %110 +%110 = OpLabel +%113 = OpLoad %3 %84 +%114 = OpULessThan %30 %113 %92 +OpBranchConditional %114 %112 %102 +%112 = OpLabel +%104 = OpLoad %3 %84 +%105 = OpAccessChain %106 %96 %104 %28 +%107 = OpLoad %5 %105 +%108 = OpAccessChain %109 %69 %104 %28 +OpStore %108 %107 +OpBranch %111 +%111 = OpLabel +%115 = OpLoad %3 %84 +%116 = OpIAdd %3 %115 %65 +OpStore %84 %116 +OpBranch %100 +%102 = OpLabel +OpStore %85 %87 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %124 None +OpBranch %123 +%123 = OpLabel +%126 = OpLoad %3 %85 +%127 = OpULessThan %30 %126 %95 +OpBranchConditional %127 %125 %103 +%125 = OpLabel +%117 = OpLoad %3 %85 +%118 = OpAccessChain %119 %98 %117 %28 +%120 = OpLoad %8 %118 +%121 = OpAccessChain %122 %72 %117 +OpStore %121 %120 +OpBranch %124 +%124 = OpLabel +%128 = OpLoad %3 %85 +%129 = OpIAdd %3 %128 %65 +OpStore %85 %129 +OpBranch %101 +%103 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm index 0bf21f6ada..08ca704b5b 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 101 +; Bound: 131 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %21 "ts_main" %16 -OpEntryPoint MeshEXT %44 "ms_main" %16 %30 %40 %43 %18 %47 -OpExecutionMode %21 LocalSize 1 1 1 -OpExecutionMode %44 LocalSize 1 1 1 -OpExecutionMode %44 OutputLinesEXT -OpExecutionMode %44 OutputVertices 2 -OpExecutionMode %44 OutputPrimitivesEXT 1 -OpDecorate %30 BuiltIn LocalInvocationIndex -OpMemberDecorate %37 0 BuiltIn Position -OpDecorate %37 Block -OpDecorate %43 BuiltIn PrimitiveLineIndicesEXT +OpEntryPoint TaskEXT %36 "ts_main" %16 %26 +OpEntryPoint MeshEXT %82 "ms_main" %16 %64 %69 %72 %18 %76 +OpExecutionMode %36 LocalSize 64 1 1 +OpExecutionMode %82 LocalSize 64 1 1 +OpExecutionMode %82 OutputLinesEXT +OpExecutionMode %82 OutputVertices 2 +OpExecutionMode %82 OutputPrimitivesEXT 1 +OpDecorate %64 BuiltIn LocalInvocationIndex +OpMemberDecorate %66 0 BuiltIn Position +OpDecorate %66 Block +OpDecorate %72 BuiltIn PrimitiveLineIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %9 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %15 0 Offset 0 OpMemberDecorate %15 1 Offset 32 OpMemberDecorate %15 2 Offset 40 OpMemberDecorate %15 3 Offset 44 -OpDecorate %47 BuiltIn LocalInvocationId +OpDecorate %26 BuiltIn LocalInvocationIndex +OpDecorate %76 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -46,119 +48,157 @@ OpDecorate %47 BuiltIn LocalInvocationId %16 = OpVariable %17 TaskPayloadWorkgroupEXT %19 = OpTypePointer Workgroup %15 %18 = OpVariable %19 Workgroup -%22 = OpTypeFunction %2 +%22 = OpTypeFunction %10 %23 = OpConstantComposite %10 %14 %14 %14 -%25 = OpConstant %3 264 -%31 = OpTypePointer Input %3 -%30 = OpVariable %31 Input -%35 = OpTypePointer Function %3 -%37 = OpTypeStruct %5 -%38 = OpTypeArray %37 %12 -%39 = OpTypePointer Output %38 -%40 = OpVariable %39 Output -%41 = OpTypeArray %8 %14 -%42 = OpTypePointer Output %41 -%43 = OpVariable %42 Output -%46 = OpConstantNull %15 -%48 = OpTypePointer Input %10 -%47 = OpVariable %48 Input -%50 = OpConstantNull %10 -%52 = OpTypeBool -%51 = OpTypeVector %52 3 -%59 = OpTypePointer Workgroup %3 -%63 = OpConstant %3 3 -%67 = OpTypePointer Workgroup %11 -%68 = OpConstant %3 0 -%70 = OpTypePointer Workgroup %13 -%77 = OpTypePointer Workgroup %5 -%80 = OpTypePointer Output %5 -%90 = OpTypePointer Workgroup %8 -%93 = OpTypePointer Output %8 -%21 = OpFunction %2 None %22 +%25 = OpConstantNull %4 +%27 = OpTypePointer Input %3 +%26 = OpVariable %27 Input +%29 = OpConstant %3 0 +%31 = OpTypeBool +%34 = OpConstant %3 264 +%37 = OpTypeFunction %2 +%41 = OpTypeInt 64 0 +%40 = OpConstant %41 256 +%42 = OpConstant %41 1024 +%64 = OpVariable %27 Input +%65 = OpConstant %3 64 +%66 = OpTypeStruct %5 +%67 = OpTypeArray %66 %12 +%68 = OpTypePointer Output %67 +%69 = OpVariable %68 Output +%70 = OpTypeArray %8 %14 +%71 = OpTypePointer Output %70 +%72 = OpVariable %71 Output +%75 = OpConstantNull %15 +%76 = OpVariable %27 Input +%86 = OpTypePointer Function %3 +%90 = OpTypePointer Workgroup %3 +%94 = OpConstant %3 3 +%98 = OpTypePointer Workgroup %11 +%100 = OpTypePointer Workgroup %13 +%107 = OpTypePointer Workgroup %5 +%110 = OpTypePointer Output %5 +%120 = OpTypePointer Workgroup %8 +%123 = OpTypePointer Output %8 +%21 = OpFunction %10 None %22 %20 = OpLabel OpBranch %24 %24 = OpLabel -OpControlBarrier %12 %12 %25 -%26 = OpCompositeExtract %3 %23 0 -%27 = OpCompositeExtract %3 %23 1 -%28 = OpCompositeExtract %3 %23 2 -OpEmitMeshTasksEXT %26 %27 %28 %16 +%28 = OpLoad %3 %26 +%30 = OpIEqual %31 %28 %29 +OpSelectionMerge %32 None +OpBranchConditional %30 %33 %32 +%33 = OpLabel +OpStore %16 %25 +OpBranch %32 +%32 = OpLabel +OpControlBarrier %12 %12 %34 +OpBranch %35 +%35 = OpLabel +OpReturnValue %23 OpFunctionEnd -%44 = OpFunction %2 None %22 -%29 = OpLabel -%33 = OpVariable %35 Function -%34 = OpVariable %35 Function -%32 = OpLoad %3 %30 -OpBranch %45 -%45 = OpLabel -%49 = OpLoad %10 %47 -%53 = OpIEqual %51 %49 %50 -%54 = OpAll %52 %53 -OpSelectionMerge %55 None -OpBranchConditional %54 %56 %55 -%56 = OpLabel -OpStore %18 %46 -OpBranch %55 -%55 = OpLabel -OpControlBarrier %12 %12 %25 -OpBranch %57 -%57 = OpLabel -OpBranch %36 -%36 = OpLabel -OpControlBarrier %12 %12 %25 -%58 = OpAccessChain %59 %18 %12 -%60 = OpLoad %3 %58 -%61 = OpExtInst %3 %1 UMin %60 %12 -%62 = OpAccessChain %59 %18 %63 -%64 = OpLoad %3 %62 -%65 = OpExtInst %3 %1 UMin %64 %14 -%66 = OpAccessChain %67 %18 %68 -%69 = OpAccessChain %70 %18 %14 -OpControlBarrier %12 %12 %25 -OpSetMeshOutputsEXT %61 %65 -OpStore %33 %32 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %82 None +%36 = OpFunction %2 None %37 +%38 = OpLabel +%39 = OpFunctionCall %10 %21 +OpControlBarrier %12 %12 %34 +%46 = OpCompositeExtract %3 %39 0 +%43 = OpUConvert %41 %46 +%47 = OpCompositeExtract %3 %39 1 +%44 = OpUConvert %41 %47 +%48 = OpCompositeExtract %3 %39 2 +%45 = OpUConvert %41 %48 +%49 = OpIMul %41 %43 %44 +%50 = OpIMul %41 %49 %45 +%51 = OpUGreaterThanEqual %31 %50 %42 +%52 = OpUGreaterThanEqual %31 %43 %40 +%53 = OpUGreaterThanEqual %31 %44 %40 +%54 = OpUGreaterThanEqual %31 %45 %40 +%55 = OpLogicalOr %31 %51 %52 +%56 = OpLogicalOr %31 %55 %53 +%57 = OpLogicalOr %31 %56 %54 +%58 = OpCompositeConstruct %10 %29 %29 %29 +%59 = OpSelect %10 %57 %58 %39 +%60 = OpCompositeExtract %3 %59 0 +%61 = OpCompositeExtract %3 %59 1 +%62 = OpCompositeExtract %3 %59 2 +OpEmitMeshTasksEXT %60 %61 %62 %16 +OpFunctionEnd +%73 = OpFunction %2 None %37 +%63 = OpLabel +OpBranch %74 +%74 = OpLabel +%77 = OpLoad %3 %76 +%78 = OpIEqual %31 %77 %29 +OpSelectionMerge %79 None +OpBranchConditional %78 %80 %79 +%80 = OpLabel +OpStore %18 %75 +OpBranch %79 +%79 = OpLabel +OpControlBarrier %12 %12 %34 OpBranch %81 %81 = OpLabel -%84 = OpLoad %3 %33 -%85 = OpULessThan %52 %84 %61 -OpBranchConditional %85 %83 %73 +OpReturn +OpFunctionEnd +%82 = OpFunction %2 None %37 %83 = OpLabel -%75 = OpLoad %3 %33 -%76 = OpAccessChain %77 %66 %75 %68 -%78 = OpLoad %5 %76 -%79 = OpAccessChain %80 %40 %75 %68 -OpStore %79 %78 -OpBranch %82 -%82 = OpLabel -%86 = OpLoad %3 %33 -%87 = OpIAdd %3 %86 %14 -OpStore %33 %87 -OpBranch %71 -%73 = OpLabel -OpStore %34 %32 -OpBranch %72 -%72 = OpLabel -OpLoopMerge %74 %95 None -OpBranch %94 -%94 = OpLabel -%97 = OpLoad %3 %34 -%98 = OpULessThan %52 %97 %65 -OpBranchConditional %98 %96 %74 -%96 = OpLabel -%88 = OpLoad %3 %34 -%89 = OpAccessChain %90 %69 %88 %68 -%91 = OpLoad %8 %89 -%92 = OpAccessChain %93 %43 %88 -OpStore %92 %91 -OpBranch %95 -%95 = OpLabel -%99 = OpLoad %3 %34 -%100 = OpIAdd %3 %99 %14 -OpStore %34 %100 -OpBranch %72 -%74 = OpLabel +%84 = OpVariable %86 Function +%85 = OpVariable %86 Function +%87 = OpLoad %3 %64 +%88 = OpFunctionCall %2 %73 +OpControlBarrier %12 %12 %34 +%89 = OpAccessChain %90 %18 %12 +%91 = OpLoad %3 %89 +%92 = OpExtInst %3 %1 UMin %91 %12 +%93 = OpAccessChain %90 %18 %94 +%95 = OpLoad %3 %93 +%96 = OpExtInst %3 %1 UMin %95 %14 +%97 = OpAccessChain %98 %18 %29 +%99 = OpAccessChain %100 %18 %14 +OpSetMeshOutputsEXT %92 %96 +OpStore %84 %87 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %112 None +OpBranch %111 +%111 = OpLabel +%114 = OpLoad %3 %84 +%115 = OpULessThan %31 %114 %92 +OpBranchConditional %115 %113 %103 +%113 = OpLabel +%105 = OpLoad %3 %84 +%106 = OpAccessChain %107 %97 %105 %29 +%108 = OpLoad %5 %106 +%109 = OpAccessChain %110 %69 %105 %29 +OpStore %109 %108 +OpBranch %112 +%112 = OpLabel +%116 = OpLoad %3 %84 +%117 = OpIAdd %3 %116 %65 +OpStore %84 %117 +OpBranch %101 +%103 = OpLabel +OpStore %85 %87 +OpBranch %102 +%102 = OpLabel +OpLoopMerge %104 %125 None +OpBranch %124 +%124 = OpLabel +%127 = OpLoad %3 %85 +%128 = OpULessThan %31 %127 %96 +OpBranchConditional %128 %126 %104 +%126 = OpLabel +%118 = OpLoad %3 %85 +%119 = OpAccessChain %120 %99 %118 %29 +%121 = OpLoad %8 %119 +%122 = OpAccessChain %123 %72 %118 +OpStore %122 %121 +OpBranch %125 +%125 = OpLabel +%129 = OpLoad %3 %85 +%130 = OpIAdd %3 %129 %65 +OpStore %85 %130 +OpBranch %102 +%104 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm index 40417f6701..2db0562c26 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 99 +; Bound: 129 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %19 "ts_main" %14 -OpEntryPoint MeshEXT %43 "ms_main" %14 %29 %39 %42 %16 %46 -OpExecutionMode %19 LocalSize 1 1 1 -OpExecutionMode %43 LocalSize 1 1 1 -OpExecutionMode %43 OutputPoints -OpExecutionMode %43 OutputVertices 1 -OpExecutionMode %43 OutputPrimitivesEXT 1 -OpDecorate %29 BuiltIn LocalInvocationIndex -OpMemberDecorate %36 0 BuiltIn Position -OpDecorate %36 Block -OpDecorate %42 BuiltIn PrimitivePointIndicesEXT +OpEntryPoint TaskEXT %35 "ts_main" %14 %24 +OpEntryPoint MeshEXT %81 "ms_main" %14 %63 %68 %71 %16 %75 +OpExecutionMode %35 LocalSize 64 1 1 +OpExecutionMode %81 LocalSize 64 1 1 +OpExecutionMode %81 OutputPoints +OpExecutionMode %81 OutputVertices 1 +OpExecutionMode %81 OutputPrimitivesEXT 1 +OpDecorate %63 BuiltIn LocalInvocationIndex +OpMemberDecorate %65 0 BuiltIn Position +OpDecorate %65 Block +OpDecorate %71 BuiltIn PrimitivePointIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %8 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %13 0 Offset 0 OpMemberDecorate %13 1 Offset 16 OpMemberDecorate %13 2 Offset 20 OpMemberDecorate %13 3 Offset 24 -OpDecorate %46 BuiltIn LocalInvocationId +OpDecorate %24 BuiltIn LocalInvocationIndex +OpDecorate %75 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -44,119 +46,157 @@ OpDecorate %46 BuiltIn LocalInvocationId %14 = OpVariable %15 TaskPayloadWorkgroupEXT %17 = OpTypePointer Workgroup %13 %16 = OpVariable %17 Workgroup -%20 = OpTypeFunction %2 +%20 = OpTypeFunction %9 %21 = OpConstantComposite %9 %11 %11 %11 -%23 = OpConstant %3 2 -%24 = OpConstant %3 264 -%30 = OpTypePointer Input %3 -%29 = OpVariable %30 Input -%34 = OpTypePointer Function %3 -%36 = OpTypeStruct %5 -%37 = OpTypeArray %36 %11 -%38 = OpTypePointer Output %37 -%39 = OpVariable %38 Output -%40 = OpTypeArray %3 %11 -%41 = OpTypePointer Output %40 -%42 = OpVariable %41 Output -%45 = OpConstantNull %13 -%47 = OpTypePointer Input %9 -%46 = OpVariable %47 Input -%49 = OpConstantNull %9 -%51 = OpTypeBool -%50 = OpTypeVector %51 3 -%58 = OpTypePointer Workgroup %3 -%62 = OpConstant %3 3 -%66 = OpTypePointer Workgroup %10 -%67 = OpConstant %3 0 -%69 = OpTypePointer Workgroup %12 -%76 = OpTypePointer Workgroup %5 -%79 = OpTypePointer Output %5 -%91 = OpTypePointer Output %3 -%19 = OpFunction %2 None %20 +%23 = OpConstantNull %4 +%25 = OpTypePointer Input %3 +%24 = OpVariable %25 Input +%27 = OpConstant %3 0 +%29 = OpTypeBool +%32 = OpConstant %3 2 +%33 = OpConstant %3 264 +%36 = OpTypeFunction %2 +%40 = OpTypeInt 64 0 +%39 = OpConstant %40 256 +%41 = OpConstant %40 1024 +%63 = OpVariable %25 Input +%64 = OpConstant %3 64 +%65 = OpTypeStruct %5 +%66 = OpTypeArray %65 %11 +%67 = OpTypePointer Output %66 +%68 = OpVariable %67 Output +%69 = OpTypeArray %3 %11 +%70 = OpTypePointer Output %69 +%71 = OpVariable %70 Output +%74 = OpConstantNull %13 +%75 = OpVariable %25 Input +%85 = OpTypePointer Function %3 +%89 = OpTypePointer Workgroup %3 +%93 = OpConstant %3 3 +%97 = OpTypePointer Workgroup %10 +%99 = OpTypePointer Workgroup %12 +%106 = OpTypePointer Workgroup %5 +%109 = OpTypePointer Output %5 +%121 = OpTypePointer Output %3 +%19 = OpFunction %9 None %20 %18 = OpLabel OpBranch %22 %22 = OpLabel -OpControlBarrier %23 %23 %24 -%25 = OpCompositeExtract %3 %21 0 -%26 = OpCompositeExtract %3 %21 1 -%27 = OpCompositeExtract %3 %21 2 -OpEmitMeshTasksEXT %25 %26 %27 %14 +%26 = OpLoad %3 %24 +%28 = OpIEqual %29 %26 %27 +OpSelectionMerge %30 None +OpBranchConditional %28 %31 %30 +%31 = OpLabel +OpStore %14 %23 +OpBranch %30 +%30 = OpLabel +OpControlBarrier %32 %32 %33 +OpBranch %34 +%34 = OpLabel +OpReturnValue %21 OpFunctionEnd -%43 = OpFunction %2 None %20 -%28 = OpLabel -%32 = OpVariable %34 Function -%33 = OpVariable %34 Function -%31 = OpLoad %3 %29 -OpBranch %44 -%44 = OpLabel -%48 = OpLoad %9 %46 -%52 = OpIEqual %50 %48 %49 -%53 = OpAll %51 %52 -OpSelectionMerge %54 None -OpBranchConditional %53 %55 %54 -%55 = OpLabel -OpStore %16 %45 -OpBranch %54 -%54 = OpLabel -OpControlBarrier %23 %23 %24 -OpBranch %56 -%56 = OpLabel -OpBranch %35 -%35 = OpLabel -OpControlBarrier %23 %23 %24 -%57 = OpAccessChain %58 %16 %23 -%59 = OpLoad %3 %57 -%60 = OpExtInst %3 %1 UMin %59 %11 -%61 = OpAccessChain %58 %16 %62 -%63 = OpLoad %3 %61 -%64 = OpExtInst %3 %1 UMin %63 %11 -%65 = OpAccessChain %66 %16 %67 -%68 = OpAccessChain %69 %16 %11 -OpControlBarrier %23 %23 %24 -OpSetMeshOutputsEXT %60 %64 -OpStore %32 %31 -OpBranch %70 -%70 = OpLabel -OpLoopMerge %72 %81 None +%35 = OpFunction %2 None %36 +%37 = OpLabel +%38 = OpFunctionCall %9 %19 +OpControlBarrier %32 %32 %33 +%45 = OpCompositeExtract %3 %38 0 +%42 = OpUConvert %40 %45 +%46 = OpCompositeExtract %3 %38 1 +%43 = OpUConvert %40 %46 +%47 = OpCompositeExtract %3 %38 2 +%44 = OpUConvert %40 %47 +%48 = OpIMul %40 %42 %43 +%49 = OpIMul %40 %48 %44 +%50 = OpUGreaterThanEqual %29 %49 %41 +%51 = OpUGreaterThanEqual %29 %42 %39 +%52 = OpUGreaterThanEqual %29 %43 %39 +%53 = OpUGreaterThanEqual %29 %44 %39 +%54 = OpLogicalOr %29 %50 %51 +%55 = OpLogicalOr %29 %54 %52 +%56 = OpLogicalOr %29 %55 %53 +%57 = OpCompositeConstruct %9 %27 %27 %27 +%58 = OpSelect %9 %56 %57 %38 +%59 = OpCompositeExtract %3 %58 0 +%60 = OpCompositeExtract %3 %58 1 +%61 = OpCompositeExtract %3 %58 2 +OpEmitMeshTasksEXT %59 %60 %61 %14 +OpFunctionEnd +%72 = OpFunction %2 None %36 +%62 = OpLabel +OpBranch %73 +%73 = OpLabel +%76 = OpLoad %3 %75 +%77 = OpIEqual %29 %76 %27 +OpSelectionMerge %78 None +OpBranchConditional %77 %79 %78 +%79 = OpLabel +OpStore %16 %74 +OpBranch %78 +%78 = OpLabel +OpControlBarrier %32 %32 %33 OpBranch %80 %80 = OpLabel -%83 = OpLoad %3 %32 -%84 = OpULessThan %51 %83 %60 -OpBranchConditional %84 %82 %72 +OpReturn +OpFunctionEnd +%81 = OpFunction %2 None %36 %82 = OpLabel -%74 = OpLoad %3 %32 -%75 = OpAccessChain %76 %65 %74 %67 -%77 = OpLoad %5 %75 -%78 = OpAccessChain %79 %39 %74 %67 -OpStore %78 %77 -OpBranch %81 -%81 = OpLabel -%85 = OpLoad %3 %32 -%86 = OpIAdd %3 %85 %11 -OpStore %32 %86 -OpBranch %70 -%72 = OpLabel -OpStore %33 %31 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %93 None -OpBranch %92 -%92 = OpLabel -%95 = OpLoad %3 %33 -%96 = OpULessThan %51 %95 %64 -OpBranchConditional %96 %94 %73 -%94 = OpLabel -%87 = OpLoad %3 %33 -%88 = OpAccessChain %58 %68 %87 %67 -%89 = OpLoad %3 %88 -%90 = OpAccessChain %91 %42 %87 -OpStore %90 %89 -OpBranch %93 -%93 = OpLabel -%97 = OpLoad %3 %33 -%98 = OpIAdd %3 %97 %11 -OpStore %33 %98 -OpBranch %71 -%73 = OpLabel +%83 = OpVariable %85 Function +%84 = OpVariable %85 Function +%86 = OpLoad %3 %63 +%87 = OpFunctionCall %2 %72 +OpControlBarrier %32 %32 %33 +%88 = OpAccessChain %89 %16 %32 +%90 = OpLoad %3 %88 +%91 = OpExtInst %3 %1 UMin %90 %11 +%92 = OpAccessChain %89 %16 %93 +%94 = OpLoad %3 %92 +%95 = OpExtInst %3 %1 UMin %94 %11 +%96 = OpAccessChain %97 %16 %27 +%98 = OpAccessChain %99 %16 %11 +OpSetMeshOutputsEXT %91 %95 +OpStore %83 %86 +OpBranch %100 +%100 = OpLabel +OpLoopMerge %102 %111 None +OpBranch %110 +%110 = OpLabel +%113 = OpLoad %3 %83 +%114 = OpULessThan %29 %113 %91 +OpBranchConditional %114 %112 %102 +%112 = OpLabel +%104 = OpLoad %3 %83 +%105 = OpAccessChain %106 %96 %104 %27 +%107 = OpLoad %5 %105 +%108 = OpAccessChain %109 %68 %104 %27 +OpStore %108 %107 +OpBranch %111 +%111 = OpLabel +%115 = OpLoad %3 %83 +%116 = OpIAdd %3 %115 %64 +OpStore %83 %116 +OpBranch %100 +%102 = OpLabel +OpStore %84 %86 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %123 None +OpBranch %122 +%122 = OpLabel +%125 = OpLoad %3 %84 +%126 = OpULessThan %29 %125 %95 +OpBranchConditional %126 %124 %103 +%124 = OpLabel +%117 = OpLoad %3 %84 +%118 = OpAccessChain %89 %98 %117 %27 +%119 = OpLoad %3 %118 +%120 = OpAccessChain %121 %71 %117 +OpStore %120 %119 +OpBranch %123 +%123 = OpLabel +%127 = OpLoad %3 %84 +%128 = OpIAdd %3 %127 %64 +OpStore %84 %128 +OpBranch %101 +%103 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader.spvasm b/naga/tests/out/spv/wgsl-mesh-shader.spvasm index 4fe27cf360..973812153f 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader.spvasm @@ -1,65 +1,67 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 384 +; Bound: 449 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %24 "ts_main" %17 %19 %33 -OpEntryPoint TaskEXT %57 "ts_divergent" %54 %17 -OpEntryPoint MeshEXT %95 "ms_main" %17 %72 %81 %85 %88 %91 %94 %19 %21 %107 -OpEntryPoint MeshEXT %214 "ms_no_ts" %192 %200 %204 %207 %210 %213 %19 %21 %217 -OpEntryPoint MeshEXT %301 "ms_divergent" %279 %287 %291 %294 %297 %300 %19 %21 %303 -OpEntryPoint Fragment %379 "fs_main" %370 %373 %376 %378 -OpExecutionMode %24 LocalSize 1 1 1 -OpExecutionMode %57 LocalSize 2 1 1 -OpExecutionMode %95 LocalSize 1 1 1 -OpExecutionMode %95 OutputTrianglesEXT -OpExecutionMode %95 OutputVertices 3 -OpExecutionMode %95 OutputPrimitivesEXT 1 -OpExecutionMode %214 LocalSize 1 1 1 -OpExecutionMode %214 OutputTrianglesEXT -OpExecutionMode %214 OutputVertices 3 -OpExecutionMode %214 OutputPrimitivesEXT 1 -OpExecutionMode %301 LocalSize 1 1 1 -OpExecutionMode %301 OutputTrianglesEXT -OpExecutionMode %301 OutputVertices 3 -OpExecutionMode %301 OutputPrimitivesEXT 1 -OpExecutionMode %379 OriginUpperLeft -OpDecorate %72 BuiltIn LocalInvocationIndex -OpMemberDecorate %78 0 BuiltIn Position -OpDecorate %78 Block -OpMemberDecorate %82 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %82 0 PerPrimitiveEXT -OpDecorate %82 Block -OpDecorate %85 PerPrimitiveEXT -OpDecorate %88 Location 0 -OpDecorate %91 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %94 Location 1 -OpDecorate %94 PerPrimitiveEXT -OpDecorate %192 BuiltIn LocalInvocationIndex -OpMemberDecorate %197 0 BuiltIn Position -OpDecorate %197 Block -OpMemberDecorate %201 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %201 0 PerPrimitiveEXT -OpDecorate %201 Block -OpDecorate %204 PerPrimitiveEXT -OpDecorate %207 Location 0 -OpDecorate %210 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %213 Location 1 -OpDecorate %213 PerPrimitiveEXT -OpMemberDecorate %284 0 BuiltIn Position -OpDecorate %284 Block -OpMemberDecorate %288 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %288 0 PerPrimitiveEXT -OpDecorate %288 Block -OpDecorate %291 PerPrimitiveEXT -OpDecorate %294 Location 0 -OpDecorate %297 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %300 Location 1 -OpDecorate %300 PerPrimitiveEXT +OpEntryPoint TaskEXT %62 "ts_main" %17 %19 %47 +OpEntryPoint TaskEXT %108 "ts_divergent" %90 %17 %96 +OpEntryPoint MeshEXT %198 "ms_main" %17 %132 %136 %140 %143 %146 %149 %19 %21 %162 +OpEntryPoint MeshEXT %290 "ms_no_ts" %252 %256 %260 %263 %266 %269 %19 %21 %273 +OpEntryPoint MeshEXT %384 "ms_divergent" %340 %342 %346 %350 %353 %356 %359 %19 %21 %362 +OpEntryPoint Fragment %444 "fs_main" %435 %438 %441 %443 +OpExecutionMode %62 LocalSize 1 1 1 +OpExecutionMode %108 LocalSize 2 1 1 +OpExecutionMode %198 LocalSize 1 1 1 +OpExecutionMode %198 OutputTrianglesEXT +OpExecutionMode %198 OutputVertices 3 +OpExecutionMode %198 OutputPrimitivesEXT 1 +OpExecutionMode %290 LocalSize 1 1 1 +OpExecutionMode %290 OutputTrianglesEXT +OpExecutionMode %290 OutputVertices 3 +OpExecutionMode %290 OutputPrimitivesEXT 1 +OpExecutionMode %384 LocalSize 2 1 1 +OpExecutionMode %384 OutputTrianglesEXT +OpExecutionMode %384 OutputVertices 3 +OpExecutionMode %384 OutputPrimitivesEXT 1 +OpExecutionMode %444 OriginUpperLeft +OpDecorate %132 BuiltIn LocalInvocationIndex +OpMemberDecorate %133 0 BuiltIn Position +OpDecorate %133 Block +OpMemberDecorate %137 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %137 0 PerPrimitiveEXT +OpDecorate %137 Block +OpDecorate %140 PerPrimitiveEXT +OpDecorate %143 Location 0 +OpDecorate %146 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %149 Location 1 +OpDecorate %149 PerPrimitiveEXT +OpDecorate %252 BuiltIn LocalInvocationIndex +OpMemberDecorate %253 0 BuiltIn Position +OpDecorate %253 Block +OpMemberDecorate %257 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %257 0 PerPrimitiveEXT +OpDecorate %257 Block +OpDecorate %260 PerPrimitiveEXT +OpDecorate %263 Location 0 +OpDecorate %266 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %269 Location 1 +OpDecorate %269 PerPrimitiveEXT +OpDecorate %342 BuiltIn LocalInvocationIndex +OpMemberDecorate %343 0 BuiltIn Position +OpDecorate %343 Block +OpMemberDecorate %347 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %347 0 PerPrimitiveEXT +OpDecorate %347 Block +OpDecorate %350 PerPrimitiveEXT +OpDecorate %353 Location 0 +OpDecorate %356 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %359 Location 1 +OpDecorate %359 PerPrimitiveEXT OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 16 OpMemberDecorate %7 0 Offset 0 @@ -74,17 +76,18 @@ OpMemberDecorate %16 0 Offset 0 OpMemberDecorate %16 1 Offset 96 OpMemberDecorate %16 2 Offset 128 OpMemberDecorate %16 3 Offset 132 -OpDecorate %33 BuiltIn LocalInvocationId -OpDecorate %54 BuiltIn LocalInvocationIndex -OpDecorate %107 BuiltIn LocalInvocationId -OpDecorate %217 BuiltIn LocalInvocationId -OpDecorate %279 BuiltIn LocalInvocationIndex -OpDecorate %303 BuiltIn LocalInvocationId -OpDecorate %370 BuiltIn FragCoord -OpDecorate %373 Location 0 -OpDecorate %376 Location 1 -OpDecorate %376 PerPrimitiveEXT -OpDecorate %378 Location 0 +OpDecorate %47 BuiltIn LocalInvocationIndex +OpDecorate %90 BuiltIn LocalInvocationId +OpDecorate %96 BuiltIn LocalInvocationIndex +OpDecorate %162 BuiltIn LocalInvocationIndex +OpDecorate %273 BuiltIn LocalInvocationIndex +OpDecorate %340 BuiltIn LocalInvocationId +OpDecorate %362 BuiltIn LocalInvocationIndex +OpDecorate %435 BuiltIn FragCoord +OpDecorate %438 Location 0 +OpDecorate %441 Location 1 +OpDecorate %441 PerPrimitiveEXT +OpDecorate %443 Location 0 %2 = OpTypeVoid %3 = OpTypeFloat 32 %4 = OpTypeVector %3 4 @@ -106,536 +109,618 @@ OpDecorate %378 Location 0 %19 = OpVariable %20 Workgroup %22 = OpTypePointer Workgroup %16 %21 = OpVariable %22 Workgroup -%25 = OpTypeFunction %2 -%26 = OpConstant %3 1 -%27 = OpConstant %3 0 -%28 = OpConstantComposite %4 %26 %26 %27 %26 -%29 = OpConstantTrue %5 -%30 = OpConstantComposite %9 %15 %15 %15 -%32 = OpConstantNull %3 -%34 = OpTypePointer Input %9 -%33 = OpVariable %34 Input -%36 = OpConstantNull %9 -%37 = OpTypeVector %5 3 -%42 = OpConstant %8 2 -%43 = OpConstant %8 264 -%45 = OpTypePointer TaskPayloadWorkgroupEXT %4 -%46 = OpConstant %8 0 -%48 = OpTypePointer TaskPayloadWorkgroupEXT %5 -%55 = OpTypePointer Input %8 -%54 = OpVariable %55 Input -%58 = OpConstantComposite %9 %42 %42 %42 -%72 = OpVariable %55 Input -%76 = OpTypePointer Function %8 -%78 = OpTypeStruct %4 -%79 = OpTypeArray %78 %13 -%80 = OpTypePointer Output %79 -%81 = OpVariable %80 Output -%82 = OpTypeStruct %5 -%83 = OpTypeArray %82 %15 -%84 = OpTypePointer Output %83 -%85 = OpVariable %84 Output -%86 = OpTypeArray %4 %13 -%87 = OpTypePointer Output %86 -%88 = OpVariable %87 Output -%89 = OpTypeArray %9 %15 -%90 = OpTypePointer Output %89 -%91 = OpVariable %90 Output -%92 = OpTypeArray %4 %15 -%93 = OpTypePointer Output %92 -%94 = OpVariable %93 Output -%96 = OpConstant %3 2 -%97 = OpConstantComposite %4 %27 %26 %27 %26 -%98 = OpConstant %3 -1 -%99 = OpConstantComposite %4 %98 %98 %27 %26 -%100 = OpConstantComposite %4 %27 %27 %26 %26 -%101 = OpConstantComposite %4 %26 %98 %27 %26 -%102 = OpConstantComposite %4 %26 %27 %27 %26 -%103 = OpConstantComposite %9 %46 %15 %42 -%104 = OpConstantComposite %4 %26 %27 %26 %26 -%106 = OpConstantNull %16 -%107 = OpVariable %34 Input -%114 = OpTypePointer Workgroup %8 -%117 = OpTypePointer Workgroup %12 -%118 = OpTypePointer Workgroup %7 -%119 = OpTypePointer Workgroup %4 -%135 = OpTypePointer Workgroup %14 -%136 = OpTypePointer Workgroup %10 -%137 = OpTypePointer Workgroup %9 -%139 = OpTypePointer Workgroup %5 -%161 = OpTypePointer Output %4 -%176 = OpTypePointer Output %9 -%180 = OpTypePointer Output %5 -%192 = OpVariable %55 Input -%197 = OpTypeStruct %4 -%198 = OpTypeArray %197 %13 -%199 = OpTypePointer Output %198 -%200 = OpVariable %199 Output -%201 = OpTypeStruct %5 -%202 = OpTypeArray %201 %15 -%203 = OpTypePointer Output %202 -%204 = OpVariable %203 Output -%205 = OpTypeArray %4 %13 -%206 = OpTypePointer Output %205 -%207 = OpVariable %206 Output -%208 = OpTypeArray %9 %15 -%209 = OpTypePointer Output %208 -%210 = OpVariable %209 Output -%211 = OpTypeArray %4 %15 -%212 = OpTypePointer Output %211 -%213 = OpVariable %212 Output -%215 = OpConstantFalse %5 -%217 = OpVariable %34 Input -%279 = OpVariable %55 Input -%284 = OpTypeStruct %4 -%285 = OpTypeArray %284 %13 -%286 = OpTypePointer Output %285 -%287 = OpVariable %286 Output -%288 = OpTypeStruct %5 -%289 = OpTypeArray %288 %15 -%290 = OpTypePointer Output %289 -%291 = OpVariable %290 Output -%292 = OpTypeArray %4 %13 -%293 = OpTypePointer Output %292 -%294 = OpVariable %293 Output -%295 = OpTypeArray %9 %15 -%296 = OpTypePointer Output %295 -%297 = OpVariable %296 Output -%298 = OpTypeArray %4 %15 -%299 = OpTypePointer Output %298 -%300 = OpVariable %299 Output -%303 = OpVariable %34 Input -%371 = OpTypePointer Input %4 -%370 = OpVariable %371 Input -%373 = OpVariable %371 Input -%376 = OpVariable %371 Input -%378 = OpVariable %161 Output -%24 = OpFunction %2 None %25 +%25 = OpTypeFunction %5 +%27 = OpTypePointer TaskPayloadWorkgroupEXT %5 +%33 = OpTypeFunction %2 %5 +%38 = OpTypeFunction %9 +%39 = OpConstant %3 1 +%40 = OpConstant %3 0 +%41 = OpConstantComposite %4 %39 %39 %40 %39 +%42 = OpConstantTrue %5 +%43 = OpConstantComposite %9 %15 %15 %15 +%45 = OpConstantNull %6 +%46 = OpConstantNull %3 +%48 = OpTypePointer Input %8 +%47 = OpVariable %48 Input +%50 = OpConstant %8 0 +%54 = OpConstant %8 2 +%55 = OpConstant %8 264 +%57 = OpTypePointer TaskPayloadWorkgroupEXT %4 +%63 = OpTypeFunction %2 +%67 = OpTypeInt 64 0 +%66 = OpConstant %67 256 +%68 = OpConstant %67 1024 +%91 = OpTypePointer Input %9 +%90 = OpVariable %91 Input +%94 = OpConstantComposite %9 %54 %54 %54 +%96 = OpVariable %48 Input +%132 = OpVariable %48 Input +%133 = OpTypeStruct %4 +%134 = OpTypeArray %133 %13 +%135 = OpTypePointer Output %134 +%136 = OpVariable %135 Output +%137 = OpTypeStruct %5 +%138 = OpTypeArray %137 %15 +%139 = OpTypePointer Output %138 +%140 = OpVariable %139 Output +%141 = OpTypeArray %4 %13 +%142 = OpTypePointer Output %141 +%143 = OpVariable %142 Output +%144 = OpTypeArray %9 %15 +%145 = OpTypePointer Output %144 +%146 = OpVariable %145 Output +%147 = OpTypeArray %4 %15 +%148 = OpTypePointer Output %147 +%149 = OpVariable %148 Output +%151 = OpConstant %3 2 +%152 = OpConstantComposite %4 %40 %39 %40 %39 +%153 = OpConstant %3 -1 +%154 = OpConstantComposite %4 %153 %153 %40 %39 +%155 = OpConstantComposite %4 %40 %40 %39 %39 +%156 = OpConstantComposite %4 %39 %153 %40 %39 +%157 = OpConstantComposite %4 %39 %40 %40 %39 +%158 = OpConstantComposite %9 %50 %15 %54 +%159 = OpConstantComposite %4 %39 %40 %39 %39 +%161 = OpConstantNull %16 +%162 = OpVariable %48 Input +%168 = OpTypePointer Workgroup %8 +%171 = OpTypePointer Workgroup %12 +%172 = OpTypePointer Workgroup %7 +%173 = OpTypePointer Workgroup %4 +%189 = OpTypePointer Workgroup %14 +%190 = OpTypePointer Workgroup %10 +%191 = OpTypePointer Workgroup %9 +%193 = OpTypePointer Workgroup %5 +%202 = OpTypePointer Function %8 +%221 = OpTypePointer Output %4 +%236 = OpTypePointer Output %9 +%240 = OpTypePointer Output %5 +%252 = OpVariable %48 Input +%253 = OpTypeStruct %4 +%254 = OpTypeArray %253 %13 +%255 = OpTypePointer Output %254 +%256 = OpVariable %255 Output +%257 = OpTypeStruct %5 +%258 = OpTypeArray %257 %15 +%259 = OpTypePointer Output %258 +%260 = OpVariable %259 Output +%261 = OpTypeArray %4 %13 +%262 = OpTypePointer Output %261 +%263 = OpVariable %262 Output +%264 = OpTypeArray %9 %15 +%265 = OpTypePointer Output %264 +%266 = OpVariable %265 Output +%267 = OpTypeArray %4 %15 +%268 = OpTypePointer Output %267 +%269 = OpVariable %268 Output +%271 = OpConstantFalse %5 +%273 = OpVariable %48 Input +%340 = OpVariable %91 Input +%342 = OpVariable %48 Input +%343 = OpTypeStruct %4 +%344 = OpTypeArray %343 %13 +%345 = OpTypePointer Output %344 +%346 = OpVariable %345 Output +%347 = OpTypeStruct %5 +%348 = OpTypeArray %347 %15 +%349 = OpTypePointer Output %348 +%350 = OpVariable %349 Output +%351 = OpTypeArray %4 %13 +%352 = OpTypePointer Output %351 +%353 = OpVariable %352 Output +%354 = OpTypeArray %9 %15 +%355 = OpTypePointer Output %354 +%356 = OpVariable %355 Output +%357 = OpTypeArray %4 %15 +%358 = OpTypePointer Output %357 +%359 = OpVariable %358 Output +%362 = OpVariable %48 Input +%436 = OpTypePointer Input %4 +%435 = OpVariable %436 Input +%438 = OpVariable %436 Input +%441 = OpVariable %436 Input +%443 = OpVariable %221 Output +%24 = OpFunction %5 None %25 %23 = OpLabel -OpBranch %31 -%31 = OpLabel -%35 = OpLoad %9 %33 -%38 = OpIEqual %37 %35 %36 -%39 = OpAll %5 %38 -OpSelectionMerge %40 None -OpBranchConditional %39 %41 %40 -%41 = OpLabel -OpStore %19 %32 -OpBranch %40 -%40 = OpLabel -OpControlBarrier %42 %42 %43 +OpBranch %26 +%26 = OpLabel +%28 = OpAccessChain %27 %17 %15 +%29 = OpLoad %5 %28 +OpReturnValue %29 +OpFunctionEnd +%32 = OpFunction %2 None %33 +%31 = OpFunctionParameter %5 +%30 = OpLabel +OpBranch %34 +%34 = OpLabel +%35 = OpAccessChain %27 %17 %15 +OpStore %35 %31 +OpReturn +OpFunctionEnd +%37 = OpFunction %9 None %38 +%36 = OpLabel OpBranch %44 %44 = OpLabel -OpStore %19 %26 -%47 = OpAccessChain %45 %17 %46 -OpStore %47 %28 -%49 = OpAccessChain %48 %17 %15 -OpStore %49 %29 -OpControlBarrier %42 %42 %43 -%50 = OpCompositeExtract %8 %30 0 -%51 = OpCompositeExtract %8 %30 1 -%52 = OpCompositeExtract %8 %30 2 -OpEmitMeshTasksEXT %50 %51 %52 %17 -OpFunctionEnd -%57 = OpFunction %2 None %25 +%49 = OpLoad %8 %47 +%51 = OpIEqual %5 %49 %50 +OpSelectionMerge %52 None +OpBranchConditional %51 %53 %52 %53 = OpLabel -%56 = OpLoad %8 %54 -OpBranch %59 -%59 = OpLabel -%60 = OpIEqual %5 %56 %46 -OpSelectionMerge %61 None -OpBranchConditional %60 %62 %61 -%62 = OpLabel -%63 = OpAccessChain %45 %17 %46 -OpStore %63 %28 -%64 = OpAccessChain %48 %17 %15 -OpStore %64 %29 -OpControlBarrier %42 %42 %43 -%65 = OpCompositeExtract %8 %30 0 -%66 = OpCompositeExtract %8 %30 1 -%67 = OpCompositeExtract %8 %30 2 -OpEmitMeshTasksEXT %65 %66 %67 %17 -%61 = OpLabel -OpControlBarrier %42 %42 %43 -%68 = OpCompositeExtract %8 %58 0 -%69 = OpCompositeExtract %8 %58 1 -%70 = OpCompositeExtract %8 %58 2 -OpEmitMeshTasksEXT %68 %69 %70 %17 +OpStore %17 %45 +OpStore %19 %46 +OpBranch %52 +%52 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %56 +%56 = OpLabel +OpStore %19 %39 +%58 = OpAccessChain %57 %17 %50 +OpStore %58 %41 +%59 = OpFunctionCall %2 %32 %42 +%60 = OpFunctionCall %5 %24 +%61 = OpAccessChain %27 %17 %15 +OpStore %61 %60 +OpReturnValue %43 OpFunctionEnd -%95 = OpFunction %2 None %25 -%71 = OpLabel -%74 = OpVariable %76 Function -%75 = OpVariable %76 Function -%73 = OpLoad %8 %72 -OpBranch %105 +%62 = OpFunction %2 None %63 +%64 = OpLabel +%65 = OpFunctionCall %9 %37 +OpControlBarrier %54 %54 %55 +%72 = OpCompositeExtract %8 %65 0 +%69 = OpUConvert %67 %72 +%73 = OpCompositeExtract %8 %65 1 +%70 = OpUConvert %67 %73 +%74 = OpCompositeExtract %8 %65 2 +%71 = OpUConvert %67 %74 +%75 = OpIMul %67 %69 %70 +%76 = OpIMul %67 %75 %71 +%77 = OpUGreaterThanEqual %5 %76 %68 +%78 = OpUGreaterThanEqual %5 %69 %66 +%79 = OpUGreaterThanEqual %5 %70 %66 +%80 = OpUGreaterThanEqual %5 %71 %66 +%81 = OpLogicalOr %5 %77 %78 +%82 = OpLogicalOr %5 %81 %79 +%83 = OpLogicalOr %5 %82 %80 +%84 = OpCompositeConstruct %9 %50 %50 %50 +%85 = OpSelect %9 %83 %84 %65 +%86 = OpCompositeExtract %8 %85 0 +%87 = OpCompositeExtract %8 %85 1 +%88 = OpCompositeExtract %8 %85 2 +OpEmitMeshTasksEXT %86 %87 %88 %17 +OpFunctionEnd +%93 = OpFunction %9 None %38 +%89 = OpLabel +%92 = OpLoad %9 %90 +OpBranch %95 +%95 = OpLabel +%97 = OpLoad %8 %96 +%98 = OpIEqual %5 %97 %50 +OpSelectionMerge %99 None +OpBranchConditional %98 %100 %99 +%100 = OpLabel +OpStore %17 %45 +OpBranch %99 +%99 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %101 +%101 = OpLabel +%102 = OpCompositeExtract %8 %92 0 +%103 = OpIEqual %5 %102 %50 +OpSelectionMerge %104 None +OpBranchConditional %103 %105 %104 %105 = OpLabel -%108 = OpLoad %9 %107 -%109 = OpIEqual %37 %108 %36 -%110 = OpAll %5 %109 -OpSelectionMerge %111 None -OpBranchConditional %110 %112 %111 -%112 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %111 -%111 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %113 -%113 = OpLabel -%115 = OpAccessChain %114 %21 %42 -OpStore %115 %13 -%116 = OpAccessChain %114 %21 %13 -OpStore %116 %15 -OpStore %19 %96 -%120 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %120 %97 -%121 = OpAccessChain %45 %17 %46 -%122 = OpLoad %4 %121 -%123 = OpFMul %4 %97 %122 -%124 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %124 %123 -%125 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %125 %99 -%126 = OpAccessChain %45 %17 %46 -%127 = OpLoad %4 %126 -%128 = OpFMul %4 %100 %127 -%129 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %129 %128 -%130 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %130 %101 -%131 = OpAccessChain %45 %17 %46 -%132 = OpLoad %4 %131 -%133 = OpFMul %4 %102 %132 -%134 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %134 %133 -%138 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %138 %103 -%140 = OpAccessChain %48 %17 %15 -%141 = OpLoad %5 %140 -%142 = OpLogicalNot %5 %141 -%143 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %143 %142 -%144 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %144 %104 -OpBranch %77 -%77 = OpLabel -OpControlBarrier %42 %42 %43 -%145 = OpAccessChain %114 %21 %42 -%146 = OpLoad %8 %145 -%147 = OpExtInst %8 %1 UMin %146 %13 -%148 = OpAccessChain %114 %21 %13 -%149 = OpLoad %8 %148 -%150 = OpExtInst %8 %1 UMin %149 %15 -%151 = OpAccessChain %117 %21 %46 -%152 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %147 %150 -OpStore %74 %73 -OpBranch %153 -%153 = OpLabel -OpLoopMerge %155 %166 None +%106 = OpAccessChain %57 %17 %50 +OpStore %106 %41 +%107 = OpAccessChain %27 %17 %15 +OpStore %107 %42 +OpReturnValue %43 +%104 = OpLabel +OpReturnValue %94 +OpFunctionEnd +%108 = OpFunction %2 None %63 +%109 = OpLabel +%110 = OpFunctionCall %9 %93 +OpControlBarrier %54 %54 %55 +%114 = OpCompositeExtract %8 %110 0 +%111 = OpUConvert %67 %114 +%115 = OpCompositeExtract %8 %110 1 +%112 = OpUConvert %67 %115 +%116 = OpCompositeExtract %8 %110 2 +%113 = OpUConvert %67 %116 +%117 = OpIMul %67 %111 %112 +%118 = OpIMul %67 %117 %113 +%119 = OpUGreaterThanEqual %5 %118 %68 +%120 = OpUGreaterThanEqual %5 %111 %66 +%121 = OpUGreaterThanEqual %5 %112 %66 +%122 = OpUGreaterThanEqual %5 %113 %66 +%123 = OpLogicalOr %5 %119 %120 +%124 = OpLogicalOr %5 %123 %121 +%125 = OpLogicalOr %5 %124 %122 +%126 = OpCompositeConstruct %9 %50 %50 %50 +%127 = OpSelect %9 %125 %126 %110 +%128 = OpCompositeExtract %8 %127 0 +%129 = OpCompositeExtract %8 %127 1 +%130 = OpCompositeExtract %8 %127 2 +OpEmitMeshTasksEXT %128 %129 %130 %17 +OpFunctionEnd +%150 = OpFunction %2 None %63 +%131 = OpLabel +OpBranch %160 +%160 = OpLabel +%163 = OpLoad %8 %162 +%164 = OpIEqual %5 %163 %50 +OpSelectionMerge %165 None +OpBranchConditional %164 %166 %165 +%166 = OpLabel +OpStore %19 %46 +OpStore %21 %161 OpBranch %165 %165 = OpLabel -%168 = OpLoad %8 %74 -%169 = OpULessThan %5 %168 %147 -OpBranchConditional %169 %167 %155 +OpControlBarrier %54 %54 %55 +OpBranch %167 %167 = OpLabel -%157 = OpLoad %8 %74 -%158 = OpAccessChain %119 %151 %157 %46 -%159 = OpLoad %4 %158 -%160 = OpAccessChain %161 %81 %157 %46 -OpStore %160 %159 -%162 = OpAccessChain %119 %151 %157 %15 -%163 = OpLoad %4 %162 -%164 = OpAccessChain %161 %88 %157 -OpStore %164 %163 -OpBranch %166 -%166 = OpLabel -%170 = OpLoad %8 %74 -%171 = OpIAdd %8 %170 %15 -OpStore %74 %171 -OpBranch %153 -%155 = OpLabel -OpStore %75 %73 -OpBranch %154 -%154 = OpLabel -OpLoopMerge %156 %185 None -OpBranch %184 -%184 = OpLabel -%187 = OpLoad %8 %75 -%188 = OpULessThan %5 %187 %150 -OpBranchConditional %188 %186 %156 -%186 = OpLabel -%172 = OpLoad %8 %75 -%173 = OpAccessChain %137 %152 %172 %46 -%174 = OpLoad %9 %173 -%175 = OpAccessChain %176 %91 %172 -OpStore %175 %174 -%177 = OpAccessChain %139 %152 %172 %15 -%178 = OpLoad %5 %177 -%179 = OpAccessChain %180 %85 %172 %46 -OpStore %179 %178 -%181 = OpAccessChain %119 %152 %172 %42 -%182 = OpLoad %4 %181 -%183 = OpAccessChain %161 %94 %172 +%169 = OpAccessChain %168 %21 %54 +OpStore %169 %13 +%170 = OpAccessChain %168 %21 %13 +OpStore %170 %15 +OpStore %19 %151 +%174 = OpAccessChain %173 %21 %50 %50 %50 +OpStore %174 %152 +%175 = OpAccessChain %57 %17 %50 +%176 = OpLoad %4 %175 +%177 = OpFMul %4 %152 %176 +%178 = OpAccessChain %173 %21 %50 %50 %15 +OpStore %178 %177 +%179 = OpAccessChain %173 %21 %50 %15 %50 +OpStore %179 %154 +%180 = OpAccessChain %57 %17 %50 +%181 = OpLoad %4 %180 +%182 = OpFMul %4 %155 %181 +%183 = OpAccessChain %173 %21 %50 %15 %15 OpStore %183 %182 -OpBranch %185 -%185 = OpLabel -%189 = OpLoad %8 %75 -%190 = OpIAdd %8 %189 %15 -OpStore %75 %190 -OpBranch %154 -%156 = OpLabel +%184 = OpAccessChain %173 %21 %50 %54 %50 +OpStore %184 %156 +%185 = OpAccessChain %57 %17 %50 +%186 = OpLoad %4 %185 +%187 = OpFMul %4 %157 %186 +%188 = OpAccessChain %173 %21 %50 %54 %15 +OpStore %188 %187 +%192 = OpAccessChain %191 %21 %15 %50 %50 +OpStore %192 %158 +%194 = OpFunctionCall %5 %24 +%195 = OpLogicalNot %5 %194 +%196 = OpAccessChain %193 %21 %15 %50 %15 +OpStore %196 %195 +%197 = OpAccessChain %173 %21 %15 %50 %54 +OpStore %197 %159 OpReturn OpFunctionEnd -%214 = OpFunction %2 None %25 -%191 = OpLabel -%194 = OpVariable %76 Function -%195 = OpVariable %76 Function -%193 = OpLoad %8 %192 -OpBranch %216 -%216 = OpLabel -%218 = OpLoad %9 %217 -%219 = OpIEqual %37 %218 %36 -%220 = OpAll %5 %219 -OpSelectionMerge %221 None -OpBranchConditional %220 %222 %221 -%222 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %221 -%221 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %223 -%223 = OpLabel -%224 = OpAccessChain %114 %21 %42 -OpStore %224 %13 -%225 = OpAccessChain %114 %21 %13 -OpStore %225 %15 -OpStore %19 %96 -%226 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %226 %97 -%227 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %227 %97 -%228 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %228 %99 -%229 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %229 %100 -%230 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %230 %101 -%231 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %231 %102 -%232 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %232 %103 -%233 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %233 %215 -%234 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %234 %104 -OpBranch %196 -%196 = OpLabel -OpControlBarrier %42 %42 %43 -%235 = OpAccessChain %114 %21 %42 -%236 = OpLoad %8 %235 -%237 = OpExtInst %8 %1 UMin %236 %13 -%238 = OpAccessChain %114 %21 %13 -%239 = OpLoad %8 %238 -%240 = OpExtInst %8 %1 UMin %239 %15 -%241 = OpAccessChain %117 %21 %46 -%242 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %237 %240 -OpStore %194 %193 -OpBranch %243 -%243 = OpLabel -OpLoopMerge %245 %255 None -OpBranch %254 -%254 = OpLabel -%257 = OpLoad %8 %194 -%258 = OpULessThan %5 %257 %237 -OpBranchConditional %258 %256 %245 -%256 = OpLabel -%247 = OpLoad %8 %194 -%248 = OpAccessChain %119 %241 %247 %46 -%249 = OpLoad %4 %248 -%250 = OpAccessChain %161 %200 %247 %46 -OpStore %250 %249 -%251 = OpAccessChain %119 %241 %247 %15 -%252 = OpLoad %4 %251 -%253 = OpAccessChain %161 %207 %247 -OpStore %253 %252 -OpBranch %255 -%255 = OpLabel -%259 = OpLoad %8 %194 -%260 = OpIAdd %8 %259 %15 -OpStore %194 %260 -OpBranch %243 -%245 = OpLabel -OpStore %195 %193 +%198 = OpFunction %2 None %63 +%199 = OpLabel +%200 = OpVariable %202 Function +%201 = OpVariable %202 Function +%203 = OpLoad %8 %132 +%204 = OpFunctionCall %2 %150 +OpControlBarrier %54 %54 %55 +%205 = OpAccessChain %168 %21 %54 +%206 = OpLoad %8 %205 +%207 = OpExtInst %8 %1 UMin %206 %13 +%208 = OpAccessChain %168 %21 %13 +%209 = OpLoad %8 %208 +%210 = OpExtInst %8 %1 UMin %209 %15 +%211 = OpAccessChain %171 %21 %50 +%212 = OpAccessChain %189 %21 %15 +OpSetMeshOutputsEXT %207 %210 +OpStore %200 %203 +OpBranch %213 +%213 = OpLabel +OpLoopMerge %215 %226 None +OpBranch %225 +%225 = OpLabel +%228 = OpLoad %8 %200 +%229 = OpULessThan %5 %228 %207 +OpBranchConditional %229 %227 %215 +%227 = OpLabel +%217 = OpLoad %8 %200 +%218 = OpAccessChain %173 %211 %217 %50 +%219 = OpLoad %4 %218 +%220 = OpAccessChain %221 %136 %217 %50 +OpStore %220 %219 +%222 = OpAccessChain %173 %211 %217 %15 +%223 = OpLoad %4 %222 +%224 = OpAccessChain %221 %143 %217 +OpStore %224 %223 +OpBranch %226 +%226 = OpLabel +%230 = OpLoad %8 %200 +%231 = OpIAdd %8 %230 %15 +OpStore %200 %231 +OpBranch %213 +%215 = OpLabel +OpStore %201 %203 +OpBranch %214 +%214 = OpLabel +OpLoopMerge %216 %245 None OpBranch %244 %244 = OpLabel -OpLoopMerge %246 %272 None -OpBranch %271 -%271 = OpLabel -%274 = OpLoad %8 %195 -%275 = OpULessThan %5 %274 %240 -OpBranchConditional %275 %273 %246 -%273 = OpLabel -%261 = OpLoad %8 %195 -%262 = OpAccessChain %137 %242 %261 %46 -%263 = OpLoad %9 %262 -%264 = OpAccessChain %176 %210 %261 -OpStore %264 %263 -%265 = OpAccessChain %139 %242 %261 %15 -%266 = OpLoad %5 %265 -%267 = OpAccessChain %180 %204 %261 %46 -OpStore %267 %266 -%268 = OpAccessChain %119 %242 %261 %42 -%269 = OpLoad %4 %268 -%270 = OpAccessChain %161 %213 %261 -OpStore %270 %269 -OpBranch %272 -%272 = OpLabel -%276 = OpLoad %8 %195 -%277 = OpIAdd %8 %276 %15 -OpStore %195 %277 -OpBranch %244 +%247 = OpLoad %8 %201 +%248 = OpULessThan %5 %247 %210 +OpBranchConditional %248 %246 %216 %246 = OpLabel +%232 = OpLoad %8 %201 +%233 = OpAccessChain %191 %212 %232 %50 +%234 = OpLoad %9 %233 +%235 = OpAccessChain %236 %146 %232 +OpStore %235 %234 +%237 = OpAccessChain %193 %212 %232 %15 +%238 = OpLoad %5 %237 +%239 = OpAccessChain %240 %140 %232 %50 +OpStore %239 %238 +%241 = OpAccessChain %173 %212 %232 %54 +%242 = OpLoad %4 %241 +%243 = OpAccessChain %221 %149 %232 +OpStore %243 %242 +OpBranch %245 +%245 = OpLabel +%249 = OpLoad %8 %201 +%250 = OpIAdd %8 %249 %15 +OpStore %201 %250 +OpBranch %214 +%216 = OpLabel OpReturn OpFunctionEnd -%301 = OpFunction %2 None %25 +%270 = OpFunction %2 None %63 +%251 = OpLabel +OpBranch %272 +%272 = OpLabel +%274 = OpLoad %8 %273 +%275 = OpIEqual %5 %274 %50 +OpSelectionMerge %276 None +OpBranchConditional %275 %277 %276 +%277 = OpLabel +OpStore %19 %46 +OpStore %21 %161 +OpBranch %276 +%276 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %278 %278 = OpLabel -%281 = OpVariable %76 Function -%282 = OpVariable %76 Function -%280 = OpLoad %8 %279 -OpBranch %302 -%302 = OpLabel -%304 = OpLoad %9 %303 -%305 = OpIEqual %37 %304 %36 -%306 = OpAll %5 %305 -OpSelectionMerge %307 None -OpBranchConditional %306 %308 %307 -%308 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %307 -%307 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %309 -%309 = OpLabel -%310 = OpIEqual %5 %280 %46 -OpSelectionMerge %311 None -OpBranchConditional %310 %312 %313 -%312 = OpLabel -%314 = OpAccessChain %114 %21 %42 -OpStore %314 %13 -%315 = OpAccessChain %114 %21 %13 -OpStore %315 %15 -OpStore %19 %96 -%316 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %316 %97 -%317 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %317 %97 -%318 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %318 %99 -%319 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %319 %100 -%320 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %320 %101 -%321 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %321 %102 -%322 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %322 %103 -%323 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %323 %215 -%324 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %324 %104 -OpBranch %283 -%313 = OpLabel -OpBranch %283 -%311 = OpLabel +%279 = OpAccessChain %168 %21 %54 +OpStore %279 %13 +%280 = OpAccessChain %168 %21 %13 +OpStore %280 %15 +OpStore %19 %151 +%281 = OpAccessChain %173 %21 %50 %50 %50 +OpStore %281 %152 +%282 = OpAccessChain %173 %21 %50 %50 %15 +OpStore %282 %152 +%283 = OpAccessChain %173 %21 %50 %15 %50 +OpStore %283 %154 +%284 = OpAccessChain %173 %21 %50 %15 %15 +OpStore %284 %155 +%285 = OpAccessChain %173 %21 %50 %54 %50 +OpStore %285 %156 +%286 = OpAccessChain %173 %21 %50 %54 %15 +OpStore %286 %157 +%287 = OpAccessChain %191 %21 %15 %50 %50 +OpStore %287 %158 +%288 = OpAccessChain %193 %21 %15 %50 %15 +OpStore %288 %271 +%289 = OpAccessChain %173 %21 %15 %50 %54 +OpStore %289 %159 OpReturn -%283 = OpLabel -OpControlBarrier %42 %42 %43 -%325 = OpAccessChain %114 %21 %42 -%326 = OpLoad %8 %325 -%327 = OpExtInst %8 %1 UMin %326 %13 -%328 = OpAccessChain %114 %21 %13 -%329 = OpLoad %8 %328 -%330 = OpExtInst %8 %1 UMin %329 %15 -%331 = OpAccessChain %117 %21 %46 -%332 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %327 %330 -OpStore %281 %280 +OpFunctionEnd +%290 = OpFunction %2 None %63 +%291 = OpLabel +%292 = OpVariable %202 Function +%293 = OpVariable %202 Function +%294 = OpLoad %8 %252 +%295 = OpFunctionCall %2 %270 +OpControlBarrier %54 %54 %55 +%296 = OpAccessChain %168 %21 %54 +%297 = OpLoad %8 %296 +%298 = OpExtInst %8 %1 UMin %297 %13 +%299 = OpAccessChain %168 %21 %13 +%300 = OpLoad %8 %299 +%301 = OpExtInst %8 %1 UMin %300 %15 +%302 = OpAccessChain %171 %21 %50 +%303 = OpAccessChain %189 %21 %15 +OpSetMeshOutputsEXT %298 %301 +OpStore %292 %294 +OpBranch %304 +%304 = OpLabel +OpLoopMerge %306 %316 None +OpBranch %315 +%315 = OpLabel +%318 = OpLoad %8 %292 +%319 = OpULessThan %5 %318 %298 +OpBranchConditional %319 %317 %306 +%317 = OpLabel +%308 = OpLoad %8 %292 +%309 = OpAccessChain %173 %302 %308 %50 +%310 = OpLoad %4 %309 +%311 = OpAccessChain %221 %256 %308 %50 +OpStore %311 %310 +%312 = OpAccessChain %173 %302 %308 %15 +%313 = OpLoad %4 %312 +%314 = OpAccessChain %221 %263 %308 +OpStore %314 %313 +OpBranch %316 +%316 = OpLabel +%320 = OpLoad %8 %292 +%321 = OpIAdd %8 %320 %15 +OpStore %292 %321 +OpBranch %304 +%306 = OpLabel +OpStore %293 %294 +OpBranch %305 +%305 = OpLabel +OpLoopMerge %307 %333 None +OpBranch %332 +%332 = OpLabel +%335 = OpLoad %8 %293 +%336 = OpULessThan %5 %335 %301 +OpBranchConditional %336 %334 %307 +%334 = OpLabel +%322 = OpLoad %8 %293 +%323 = OpAccessChain %191 %303 %322 %50 +%324 = OpLoad %9 %323 +%325 = OpAccessChain %236 %266 %322 +OpStore %325 %324 +%326 = OpAccessChain %193 %303 %322 %15 +%327 = OpLoad %5 %326 +%328 = OpAccessChain %240 %260 %322 %50 +OpStore %328 %327 +%329 = OpAccessChain %173 %303 %322 %54 +%330 = OpLoad %4 %329 +%331 = OpAccessChain %221 %269 %322 +OpStore %331 %330 OpBranch %333 %333 = OpLabel -OpLoopMerge %335 %345 None -OpBranch %344 -%344 = OpLabel -%347 = OpLoad %8 %281 -%348 = OpULessThan %5 %347 %327 -OpBranchConditional %348 %346 %335 -%346 = OpLabel -%337 = OpLoad %8 %281 -%338 = OpAccessChain %119 %331 %337 %46 -%339 = OpLoad %4 %338 -%340 = OpAccessChain %161 %287 %337 %46 -OpStore %340 %339 -%341 = OpAccessChain %119 %331 %337 %15 -%342 = OpLoad %4 %341 -%343 = OpAccessChain %161 %294 %337 -OpStore %343 %342 -OpBranch %345 -%345 = OpLabel -%349 = OpLoad %8 %281 -%350 = OpIAdd %8 %349 %15 -OpStore %281 %350 -OpBranch %333 -%335 = OpLabel -OpStore %282 %280 -OpBranch %334 -%334 = OpLabel -OpLoopMerge %336 %362 None +%337 = OpLoad %8 %293 +%338 = OpIAdd %8 %337 %15 +OpStore %293 %338 +OpBranch %305 +%307 = OpLabel +OpReturn +OpFunctionEnd +%360 = OpFunction %2 None %63 +%339 = OpLabel +%341 = OpLoad %9 %340 OpBranch %361 %361 = OpLabel -%364 = OpLoad %8 %282 -%365 = OpULessThan %5 %364 %330 -OpBranchConditional %365 %363 %336 -%363 = OpLabel -%351 = OpLoad %8 %282 -%352 = OpAccessChain %137 %332 %351 %46 -%353 = OpLoad %9 %352 -%354 = OpAccessChain %176 %297 %351 -OpStore %354 %353 -%355 = OpAccessChain %139 %332 %351 %15 -%356 = OpLoad %5 %355 -%357 = OpAccessChain %180 %291 %351 %46 -OpStore %357 %356 -%358 = OpAccessChain %119 %332 %351 %42 -%359 = OpLoad %4 %358 -%360 = OpAccessChain %161 %300 %351 -OpStore %360 %359 -OpBranch %362 -%362 = OpLabel -%366 = OpLoad %8 %282 -%367 = OpIAdd %8 %366 %15 -OpStore %282 %367 -OpBranch %334 -%336 = OpLabel +%363 = OpLoad %8 %362 +%364 = OpIEqual %5 %363 %50 +OpSelectionMerge %365 None +OpBranchConditional %364 %366 %365 +%366 = OpLabel +OpStore %19 %46 +OpStore %21 %161 +OpBranch %365 +%365 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %367 +%367 = OpLabel +%368 = OpCompositeExtract %8 %341 0 +%369 = OpIEqual %5 %368 %50 +OpSelectionMerge %370 None +OpBranchConditional %369 %371 %372 +%371 = OpLabel +%373 = OpAccessChain %168 %21 %54 +OpStore %373 %13 +%374 = OpAccessChain %168 %21 %13 +OpStore %374 %15 +OpStore %19 %151 +%375 = OpAccessChain %173 %21 %50 %50 %50 +OpStore %375 %152 +%376 = OpAccessChain %173 %21 %50 %50 %15 +OpStore %376 %152 +%377 = OpAccessChain %173 %21 %50 %15 %50 +OpStore %377 %154 +%378 = OpAccessChain %173 %21 %50 %15 %15 +OpStore %378 %155 +%379 = OpAccessChain %173 %21 %50 %54 %50 +OpStore %379 %156 +%380 = OpAccessChain %173 %21 %50 %54 %15 +OpStore %380 %157 +%381 = OpAccessChain %191 %21 %15 %50 %50 +OpStore %381 %158 +%382 = OpAccessChain %193 %21 %15 %50 %15 +OpStore %382 %271 +%383 = OpAccessChain %173 %21 %15 %50 %54 +OpStore %383 %159 +OpReturn +%372 = OpLabel +OpReturn +%370 = OpLabel +OpReturn +OpFunctionEnd +%384 = OpFunction %2 None %63 +%385 = OpLabel +%386 = OpVariable %202 Function +%387 = OpVariable %202 Function +%388 = OpLoad %8 %342 +%389 = OpFunctionCall %2 %360 +OpControlBarrier %54 %54 %55 +%390 = OpAccessChain %168 %21 %54 +%391 = OpLoad %8 %390 +%392 = OpExtInst %8 %1 UMin %391 %13 +%393 = OpAccessChain %168 %21 %13 +%394 = OpLoad %8 %393 +%395 = OpExtInst %8 %1 UMin %394 %15 +%396 = OpAccessChain %171 %21 %50 +%397 = OpAccessChain %189 %21 %15 +OpSetMeshOutputsEXT %392 %395 +OpStore %386 %388 +OpBranch %398 +%398 = OpLabel +OpLoopMerge %400 %410 None +OpBranch %409 +%409 = OpLabel +%412 = OpLoad %8 %386 +%413 = OpULessThan %5 %412 %392 +OpBranchConditional %413 %411 %400 +%411 = OpLabel +%402 = OpLoad %8 %386 +%403 = OpAccessChain %173 %396 %402 %50 +%404 = OpLoad %4 %403 +%405 = OpAccessChain %221 %346 %402 %50 +OpStore %405 %404 +%406 = OpAccessChain %173 %396 %402 %15 +%407 = OpLoad %4 %406 +%408 = OpAccessChain %221 %353 %402 +OpStore %408 %407 +OpBranch %410 +%410 = OpLabel +%414 = OpLoad %8 %386 +%415 = OpIAdd %8 %414 %54 +OpStore %386 %415 +OpBranch %398 +%400 = OpLabel +OpStore %387 %388 +OpBranch %399 +%399 = OpLabel +OpLoopMerge %401 %427 None +OpBranch %426 +%426 = OpLabel +%429 = OpLoad %8 %387 +%430 = OpULessThan %5 %429 %395 +OpBranchConditional %430 %428 %401 +%428 = OpLabel +%416 = OpLoad %8 %387 +%417 = OpAccessChain %191 %397 %416 %50 +%418 = OpLoad %9 %417 +%419 = OpAccessChain %236 %356 %416 +OpStore %419 %418 +%420 = OpAccessChain %193 %397 %416 %15 +%421 = OpLoad %5 %420 +%422 = OpAccessChain %240 %350 %416 %50 +OpStore %422 %421 +%423 = OpAccessChain %173 %397 %416 %54 +%424 = OpLoad %4 %423 +%425 = OpAccessChain %221 %359 %416 +OpStore %425 %424 +OpBranch %427 +%427 = OpLabel +%431 = OpLoad %8 %387 +%432 = OpIAdd %8 %431 %54 +OpStore %387 %432 +OpBranch %399 +%401 = OpLabel OpReturn OpFunctionEnd -%379 = OpFunction %2 None %25 -%368 = OpLabel -%372 = OpLoad %4 %370 -%374 = OpLoad %4 %373 -%369 = OpCompositeConstruct %7 %372 %374 -%377 = OpLoad %4 %376 -%375 = OpCompositeConstruct %11 %377 -OpBranch %380 -%380 = OpLabel -%381 = OpCompositeExtract %4 %369 1 -%382 = OpCompositeExtract %4 %375 0 -%383 = OpFMul %4 %381 %382 -OpStore %378 %383 +%444 = OpFunction %2 None %63 +%433 = OpLabel +%437 = OpLoad %4 %435 +%439 = OpLoad %4 %438 +%434 = OpCompositeConstruct %7 %437 %439 +%442 = OpLoad %4 %441 +%440 = OpCompositeConstruct %11 %442 +OpBranch %445 +%445 = OpLabel +%446 = OpCompositeExtract %4 %434 1 +%447 = OpCompositeExtract %4 %440 0 +%448 = OpFMul %4 %446 %447 +OpStore %443 %448 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm b/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm index 45e71af552..f32cd756b1 100644 --- a/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm +++ b/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm @@ -1,15 +1,15 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 33 +; Bound: 29 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %11 "f" %18 +OpEntryPoint GLCompute %11 "f" %17 OpExecutionMode %11 LocalSize 1 1 1 OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 4 -OpDecorate %18 BuiltIn LocalInvocationId +OpDecorate %17 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 1 %4 = OpTypeInt 32 0 @@ -22,31 +22,27 @@ OpDecorate %18 BuiltIn LocalInvocationId %13 = OpConstant %4 2 %14 = OpConstant %4 1 %16 = OpConstantNull %4 -%17 = OpTypeVector %4 3 -%19 = OpTypePointer Input %17 -%18 = OpVariable %19 Input -%21 = OpConstantNull %17 -%22 = OpTypeVector %5 3 -%27 = OpConstant %4 264 -%30 = OpConstant %4 0 +%18 = OpTypePointer Input %4 +%17 = OpVariable %18 Input +%20 = OpConstant %4 0 +%24 = OpConstant %4 264 %11 = OpFunction %2 None %12 %10 = OpLabel OpBranch %15 %15 = OpLabel -%20 = OpLoad %17 %18 -%23 = OpIEqual %22 %20 %21 -%24 = OpAll %5 %23 -OpSelectionMerge %25 None -OpBranchConditional %24 %26 %25 -%26 = OpLabel +%19 = OpLoad %4 %17 +%21 = OpIEqual %5 %19 %20 +OpSelectionMerge %22 None +OpBranchConditional %21 %23 %22 +%23 = OpLabel OpStore %8 %16 +OpBranch %22 +%22 = OpLabel +OpControlBarrier %13 %13 %24 OpBranch %25 %25 = OpLabel -OpControlBarrier %13 %13 %27 -OpBranch %28 -%28 = OpLabel -%31 = OpAtomicCompareExchange %4 %8 %7 %30 %30 %14 %13 -%32 = OpIEqual %5 %31 %13 -%29 = OpCompositeConstruct %6 %31 %32 +%27 = OpAtomicCompareExchange %4 %8 %7 %20 %20 %14 %13 +%28 = OpIEqual %5 %27 %13 +%26 = OpCompositeConstruct %6 %27 %28 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-policy-mix.spvasm b/naga/tests/out/spv/wgsl-policy-mix.spvasm index 8d9209d7b3..23f3313349 100644 --- a/naga/tests/out/spv/wgsl-policy-mix.spvasm +++ b/naga/tests/out/spv/wgsl-policy-mix.spvasm @@ -1,13 +1,13 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 126 +; Bound: 123 OpCapability Shader OpCapability ImageQuery OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %102 "main" %115 +OpEntryPoint GLCompute %102 "main" %114 OpExecutionMode %102 LocalSize 1 1 1 %3 = OpString "policy-mix.wgsl" OpSource Unknown 0 %3 "// Tests that the index, buffer, and texture bounds checks policies are @@ -82,7 +82,7 @@ OpDecorate %26 Block OpMemberDecorate %26 0 Offset 0 OpDecorate %28 DescriptorSet 0 OpDecorate %28 Binding 2 -OpDecorate %115 BuiltIn LocalInvocationId +OpDecorate %114 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %4 = OpTypeFloat 32 %5 = OpTypeVector %4 4 @@ -147,11 +147,9 @@ OpDecorate %115 BuiltIn LocalInvocationId %110 = OpConstant %18 3 %111 = OpConstant %18 4 %113 = OpConstantNull %14 -%114 = OpTypeVector %8 3 -%116 = OpTypePointer Input %114 -%115 = OpVariable %116 Input -%118 = OpConstantNull %114 -%123 = OpConstant %8 264 +%115 = OpTypePointer Input %8 +%114 = OpVariable %115 Input +%120 = OpConstant %8 264 %39 = OpFunction %5 None %40 %36 = OpFunctionParameter %19 %37 = OpFunctionParameter %18 @@ -214,21 +212,20 @@ OpFunctionEnd %106 = OpLoad %13 %28 OpBranch %112 %112 = OpLabel -%117 = OpLoad %114 %115 -%119 = OpIEqual %74 %117 %118 -%120 = OpAll %67 %119 -OpSelectionMerge %121 None -OpBranchConditional %120 %122 %121 -%122 = OpLabel +%116 = OpLoad %8 %114 +%117 = OpIEqual %67 %116 %42 +OpSelectionMerge %118 None +OpBranchConditional %117 %119 %118 +%119 = OpLabel OpStore %30 %113 +OpBranch %118 +%118 = OpLabel +OpControlBarrier %21 %21 %120 OpBranch %121 %121 = OpLabel -OpControlBarrier %21 %21 %123 -OpBranch %124 -%124 = OpLabel OpLine %3 37 19 OpLine %3 37 19 OpLine %3 37 5 -%125 = OpFunctionCall %5 %39 %109 %110 %111 +%122 = OpFunctionCall %5 %39 %109 %110 %111 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm b/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm index c2d0de7a04..e82a9a70ba 100644 --- a/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 39 +; Bound: 38 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -9,7 +9,7 @@ OpEntryPoint GLCompute %14 "test_workgroupUniformLoad" %11 %19 OpExecutionMode %14 LocalSize 4 1 1 OpDecorate %5 ArrayStride 4 OpDecorate %11 BuiltIn WorkgroupId -OpDecorate %19 BuiltIn LocalInvocationId +OpDecorate %19 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -23,43 +23,42 @@ OpDecorate %19 BuiltIn LocalInvocationId %15 = OpTypeFunction %2 %16 = OpConstant %4 10 %18 = OpConstantNull %5 -%19 = OpVariable %12 Input -%21 = OpConstantNull %7 -%23 = OpTypeBool -%22 = OpTypeVector %23 3 -%28 = OpConstant %3 2 -%29 = OpConstant %3 264 -%32 = OpTypePointer Workgroup %4 +%20 = OpTypePointer Input %3 +%19 = OpVariable %20 Input +%22 = OpConstant %3 0 +%24 = OpTypeBool +%27 = OpConstant %3 2 +%28 = OpConstant %3 264 +%31 = OpTypePointer Workgroup %4 %14 = OpFunction %2 None %15 %10 = OpLabel %13 = OpLoad %7 %11 OpBranch %17 %17 = OpLabel -%20 = OpLoad %7 %19 -%24 = OpIEqual %22 %20 %21 -%25 = OpAll %23 %24 -OpSelectionMerge %26 None -OpBranchConditional %25 %27 %26 -%27 = OpLabel -OpStore %8 %18 -OpBranch %26 +%21 = OpLoad %3 %19 +%23 = OpIEqual %24 %21 %22 +OpSelectionMerge %25 None +OpBranchConditional %23 %26 %25 %26 = OpLabel -OpControlBarrier %28 %28 %29 -OpBranch %30 -%30 = OpLabel -%31 = OpCompositeExtract %3 %13 0 -OpControlBarrier %28 %28 %29 -%33 = OpAccessChain %32 %8 %31 -%34 = OpLoad %4 %33 -OpControlBarrier %28 %28 %29 -%35 = OpSGreaterThan %23 %34 %16 -OpSelectionMerge %36 None -OpBranchConditional %35 %37 %38 -%37 = OpLabel -OpControlBarrier %28 %28 %29 +OpStore %8 %18 +OpBranch %25 +%25 = OpLabel +OpControlBarrier %27 %27 %28 +OpBranch %29 +%29 = OpLabel +%30 = OpCompositeExtract %3 %13 0 +OpControlBarrier %27 %27 %28 +%32 = OpAccessChain %31 %8 %30 +%33 = OpLoad %4 %32 +OpControlBarrier %27 %27 %28 +%34 = OpSGreaterThan %24 %33 %16 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %37 +%36 = OpLabel +OpControlBarrier %27 %27 %28 OpReturn -%38 = OpLabel +%37 = OpLabel OpReturn -%36 = OpLabel +%35 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm b/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm index cb214f3e35..801ad15e94 100644 --- a/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 42 +; Bound: 38 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %18 "main" %26 +OpEntryPoint GLCompute %18 "main" %25 OpExecutionMode %18 LocalSize 1 1 1 %3 = OpString "workgroup-var-init.wgsl" OpSource Unknown 0 %3 "struct WStruct { @@ -41,7 +41,7 @@ OpDecorate %14 DescriptorSet 0 OpDecorate %14 Binding 0 OpDecorate %15 Block OpMemberDecorate %15 0 Offset 0 -OpDecorate %26 BuiltIn LocalInvocationId +OpDecorate %25 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %4 = OpTypeInt 32 0 %6 = OpConstant %4 512 @@ -60,36 +60,32 @@ OpDecorate %26 BuiltIn LocalInvocationId %20 = OpTypePointer StorageBuffer %5 %21 = OpConstant %4 0 %24 = OpConstantNull %11 -%25 = OpTypeVector %4 3 -%27 = OpTypePointer Input %25 -%26 = OpVariable %27 Input -%29 = OpConstantNull %25 -%31 = OpTypeBool -%30 = OpTypeVector %31 3 -%36 = OpConstant %4 2 -%37 = OpConstant %4 264 -%39 = OpTypePointer Workgroup %5 +%26 = OpTypePointer Input %4 +%25 = OpVariable %26 Input +%29 = OpTypeBool +%32 = OpConstant %4 2 +%33 = OpConstant %4 264 +%35 = OpTypePointer Workgroup %5 %18 = OpFunction %2 None %19 %17 = OpLabel %22 = OpAccessChain %20 %14 %21 OpBranch %23 %23 = OpLabel -%28 = OpLoad %25 %26 -%32 = OpIEqual %30 %28 %29 -%33 = OpAll %31 %32 -OpSelectionMerge %34 None -OpBranchConditional %33 %35 %34 -%35 = OpLabel +%27 = OpLoad %4 %25 +%28 = OpIEqual %29 %27 %21 +OpSelectionMerge %30 None +OpBranchConditional %28 %31 %30 +%31 = OpLabel OpStore %12 %24 +OpBranch %30 +%30 = OpLabel +OpControlBarrier %32 %32 %33 OpBranch %34 %34 = OpLabel -OpControlBarrier %36 %36 %37 -OpBranch %38 -%38 = OpLabel OpLine %3 14 14 -%40 = OpAccessChain %39 %12 %21 -%41 = OpLoad %5 %40 +%36 = OpAccessChain %35 %12 %21 +%37 = OpLoad %5 %36 OpLine %3 14 5 -OpStore %22 %41 +OpStore %22 %37 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl index c5e853af26..4920dbd865 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl index fe7c341f30..85fb355995 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl index b6eea73d08..0407e21c67 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl index 974027fdbb..93372453ce 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl @@ -31,17 +31,29 @@ var taskPayload: TaskPayload; var workgroupData: f32; var mesh_output: MeshOutput; +fn helper_reader() -> bool { + let _e2 = taskPayload.visible; + return _e2; +} + +fn helper_writer(value: bool) { + taskPayload.visible = value; + return; +} + @task @payload(taskPayload) @workgroup_size(1, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { workgroupData = 1f; taskPayload.colorMask = vec4(1f, 1f, 0f, 1f); - taskPayload.visible = true; + helper_writer(true); + let _e12 = helper_reader(); + taskPayload.visible = _e12; return vec3(1u, 1u, 1u); } @task @payload(taskPayload) @workgroup_size(2, 1, 1) -fn ts_divergent(@builtin(local_invocation_index) thread_id: u32) -> @builtin(mesh_task_size) vec3 { - if (thread_id == 0u) { +fn ts_divergent(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if (thread_id.x == 0u) { taskPayload.colorMask = vec4(1f, 1f, 0f, 1f); taskPayload.visible = true; return vec3(1u, 1u, 1u); @@ -64,8 +76,8 @@ fn ms_main() { let _e67 = taskPayload.colorMask; mesh_output.vertices[2].color = (vec4(1f, 0f, 0f, 1f) * _e67); mesh_output.primitives[0].indices = vec3(0u, 1u, 2u); - let _e88 = taskPayload.visible; - mesh_output.primitives[0].cull = !(_e88); + let _e86 = helper_reader(); + mesh_output.primitives[0].cull = !(_e86); mesh_output.primitives[0].colorMask = vec4(1f, 0f, 1f, 1f); return; } @@ -87,9 +99,9 @@ fn ms_no_ts() { return; } -@mesh(mesh_output) @workgroup_size(1, 1, 1) -fn ms_divergent(@builtin(local_invocation_index) thread_id_1: u32) { - if (thread_id_1 == 0u) { +@mesh(mesh_output) @workgroup_size(2, 1, 1) +fn ms_divergent(@builtin(local_invocation_id) thread_id_1: vec3) { + if (thread_id_1.x == 0u) { mesh_output.vertex_count = 3u; mesh_output.primitive_count = 1u; workgroupData = 2f; diff --git a/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl b/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl index 13f6c225e4..8a8d6de119 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl +++ b/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl @@ -96,9 +96,9 @@ fn ms_no_ts() { @mesh(mesh_output) @workgroup_size(2) -fn ms_divergent(@builtin(local_invocation_index) index: u32) { +fn ms_divergent(@builtin(local_invocation_id) thread_id: vec3) { // Workgroup with 2 threads. They return at different points. - if index == 0 { + if thread_id.x == 0 { mesh_output.vertex_count = 3; mesh_output.primitive_count = 1; workgroupData = 2.0; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 796f660e34..fd605596c4 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -2177,6 +2177,7 @@ impl super::Adapter { drop_callback: Option, enabled_extensions: &[&'static CStr], features: wgt::Features, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, family_index: u32, queue_index: u32, @@ -2437,6 +2438,10 @@ impl super::Adapter { // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None, + task_runtime_limits: Some(spv::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: limits.max_task_mesh_workgroups_per_dimension, + max_mesh_workgroups_total: limits.max_task_mesh_workgroup_total_count, + }), } }; @@ -2549,6 +2554,7 @@ impl super::Adapter { pub unsafe fn open_with_callback<'a>( &self, features: wgt::Features, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, callback: Option>>, ) -> Result, crate::DeviceError> { @@ -2611,6 +2617,7 @@ impl super::Adapter { None, &enabled_extensions, features, + limits, memory_hints, family_info.queue_family_index, 0, @@ -2625,10 +2632,10 @@ impl crate::Adapter for super::Adapter { unsafe fn open( &self, features: wgt::Features, - _limits: &wgt::Limits, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { - unsafe { self.open_with_callback(features, memory_hints, None) } + unsafe { self.open_with_callback(features, limits, memory_hints, None) } } unsafe fn texture_format_capabilities( diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index f804a208fb..d489ceead6 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -653,7 +653,8 @@ impl super::Device { || !runtime_checks.ray_query_initialization_tracking || !binding_map.is_empty() || naga_shader.debug_source.is_some() - || !stage.zero_initialize_workgroup_memory; + || !stage.zero_initialize_workgroup_memory + || !runtime_checks.task_shader_dispatch_tracking; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -686,6 +687,9 @@ impl super::Device { temp_options.zero_initialize_workgroup_memory = naga::back::spv::ZeroInitializeWorkgroupMemoryMode::None; } + if !runtime_checks.task_shader_dispatch_tracking { + temp_options.task_runtime_limits = None; + } &temp_options } else { diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs index 99e173a5ae..97a3ebe11c 100644 --- a/wgpu-types/src/features.rs +++ b/wgpu-types/src/features.rs @@ -1153,6 +1153,10 @@ bitflags_array! { /// /// Naga is only supported on vulkan. On other platforms you will have to use passthrough shaders. /// + /// It is recommended to use [`Device::create_shader_module_trusted`] with [`ShaderRuntimeChecks::unchecked()`] + /// to avoid workgroup memory zero initialization, which can be expensive due to zero initialization being + /// single-threaded currently. + /// /// Some Mesa drivers including LLVMPIPE but not RADV fail to run the naga generated code. /// [This may be our bug and will be investigated.](https://github.com/gfx-rs/wgpu/issues/8727) /// However, due to the nature of the failure, the fact that it is unique, and the random changes @@ -1160,6 +1164,9 @@ bitflags_array! { /// [this Mesa issue.](https://gitlab.freedesktop.org/mesa/mesa/-/issues/14376) /// /// This is a native only feature. + /// + /// [`Device::create_shader_module_trusted`]: https://docs.rs/wgpu/latest/wgpu/struct.Device.html#method.create_shader_module_trusted + /// [`ShaderRuntimeChecks::unchecked()`]: crate::ShaderRuntimeChecks::unchecked const EXPERIMENTAL_MESH_SHADER = 1 << 48; /// ***THIS IS EXPERIMENTAL:*** Features enabled by this may have diff --git a/wgpu-types/src/shader.rs b/wgpu-types/src/shader.rs index f34ec963b0..0bb76edfbc 100644 --- a/wgpu-types/src/shader.rs +++ b/wgpu-types/src/shader.rs @@ -43,6 +43,9 @@ pub struct ShaderRuntimeChecks { /// /// It is the aim that these cases will not cause UB if this is set to true, but currently this will still happen on DX12 and Metal. pub ray_query_initialization_tracking: bool, + + /// If false, task shaders will not validate that the mesh shader grid they dispatch is within legal limits. + pub task_shader_dispatch_tracking: bool, } impl ShaderRuntimeChecks { @@ -76,6 +79,7 @@ impl ShaderRuntimeChecks { bounds_checks: all_checks, force_loop_bounding: all_checks, ray_query_initialization_tracking: all_checks, + task_shader_dispatch_tracking: all_checks, } } }