diff --git a/CHANGELOG.md b/CHANGELOG.md index 45a047d7d69..7d3b449168c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,12 @@ Bottom level categories: - Added support for obtaining `AdapterInfo` from `Device`. By @sagudev in [#8807](https://github.com/gfx-rs/wgpu/pull/8807). - Added `Limits::or_worse_values_from`. By @atlv24 in [#8870](https://github.com/gfx-rs/wgpu/pull/8870). +### General + +#### DX12 + +- Full support for mesh shaders in HLSL/DX12. By @inner-daemons in #8752. + ### Bug Fixes #### General @@ -60,6 +66,9 @@ Bottom level categories: - The validator checks that override-sized arrays have a positive size, if overrides have been resolved. By @andyleiserson in [#8822](https://github.com/gfx-rs/wgpu/pull/8822). - Fix some cases where f16 constants were not working. By @andyleiserson in [#8816](https://github.com/gfx-rs/wgpu/pull/8816). +#### Vulkan +- Fixed a variety of mesh shader SPIR-V writer issues from the original implementation. By @inner-daemons in [#8756](https://github.com/gfx-rs/wgpu/pull/8756) + #### GLES - `DisplayHandle` should now be passed to `InstanceDescriptor` for correct EGL initialization on Wayland. By @MarijnS95 in [#8012](https://github.com/gfx-rs/wgpu/pull/8012) diff --git a/docs/api-specs/mesh_shading.md b/docs/api-specs/mesh_shading.md index c2b0a5202a0..7cc6fea0a2a 100644 --- a/docs/api-specs/mesh_shading.md +++ b/docs/api-specs/mesh_shading.md @@ -130,13 +130,11 @@ A task shader entry point must have a `@workgroup_size` attribute, meeting the s A task shader entry point must also have a `@payload(G)` property, where `G` is the name of a global variable in the `task_payload` address space. Each task shader workgroup has its own instance of this variable, visible to all invocations in the workgroup. Whatever value the workgroup collectively stores in that global variable becomes the **task payload**, and is provided to all invocations in the mesh shader grid dispatched for the workgroup. A task payload variable must be at least 4 bytes in size. -A task shader entry point must return a `vec3` value. The return value of each workgroup's first invocation (that is, the one whose `local_invocation_index` is `0`) is taken as the size of a **mesh shader grid** to dispatch, measured in workgroups. (If the task shader entry point returns `vec3(0, 0, 0)`, then no mesh shaders are dispatched.) Mesh shader grids are described in the next section. +A task shader entry point must return a `vec3` value decorated with `@builtin(mesh_task_size)`. The return value of each workgroup's first invocation (that is, the one whose `local_invocation_index` is `0`) is taken as the size of a **mesh shader grid** to dispatch, measured in workgroups. (If the task shader entry point returns `vec3(0, 0, 0)`, then no mesh shaders are dispatched.) Mesh shader grids are described in the next section. Each task shader workgroup dispatches an independent mesh shader grid: in mesh shader invocations, `@builtin` values like `workgroup_id` and `global_invocation_id` describe the position of the workgroup and invocation within that grid; and `@builtin(num_workgroups)` matches the task shader workgroup's return value. Mesh shaders dispatched for other task shader workgroups are not included in the count. If it is necessary for a mesh shader to know which task shader workgroup dispatched it, the task shader can include its own workgroup id in the task payload. -Task shaders must return a value of type `vec3` decorated with `@builtin(mesh_task_size)`. - Task shaders can use compute and subgroup builtin inputs, in addition to `view_index` and `draw_id`. ### Mesh shader diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index 418e510e1b4..5d137709c5f 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -260,11 +260,13 @@ impl ExampleContext { async fn init_async(surface: &mut SurfaceWrapper, window: Arc) -> Self { log::info!("Initializing wgpu..."); - let instance_descriptor = wgpu::InstanceDescriptor::from_env_or_default() - .with_display_handle(Box::new( - // TODO: Use event_loop.owned_display_handle() with winit 0.30 - window.clone(), - )); + let mut instance_descriptor = wgpu::InstanceDescriptor::default(); + // Use static DXC by default so we can utilize the latest features + instance_descriptor.backend_options.dx12.shader_compiler = wgpu::Dx12Compiler::StaticDxc; + let instance_descriptor = instance_descriptor.with_env().with_display_handle(Box::new( + // TODO: Use event_loop.owned_display_handle() with winit 0.30 + window.clone(), + )); let instance = wgpu::Instance::new(instance_descriptor); surface.pre_adapter(&instance, window); let adapter = get_adapter_with_capabilities_or_from_env( diff --git a/examples/features/src/mesh_shader/mod.rs b/examples/features/src/mesh_shader/mod.rs index 20f069e9a84..738a19d57f9 100644 --- a/examples/features/src/mesh_shader/mod.rs +++ b/examples/features/src/mesh_shader/mod.rs @@ -1,40 +1,14 @@ // Same as in mesh shader tests fn compile_wgsl(device: &wgpu::Device) -> wgpu::ShaderModule { - device.create_shader_module(wgpu::ShaderModuleDescriptor { - label: None, - source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), - }) -} -fn compile_hlsl(device: &wgpu::Device, entry: &str, stage_str: &str) -> wgpu::ShaderModule { - let out_path = format!( - "{}/src/mesh_shader/shader.{stage_str}.cso", - env!("CARGO_MANIFEST_DIR") - ); - let cmd = std::process::Command::new("dxc") - .args([ - "-T", - &format!("{stage_str}_6_5"), - "-E", - entry, - &format!("{}/src/mesh_shader/shader.hlsl", env!("CARGO_MANIFEST_DIR")), - "-Fo", - &out_path, - ]) - .output() - .unwrap(); - if !cmd.status.success() { - panic!("DXC failed:\n{}", String::from_utf8(cmd.stderr).unwrap()); - } - let file = std::fs::read(&out_path).unwrap(); - std::fs::remove_file(out_path).unwrap(); + // Workgroup memory zero initialization can be expensive for mesh shaders unsafe { - device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough { - entry_point: entry.to_owned(), - label: None, - num_workgroups: (1, 1, 1), - dxil: Some(std::borrow::Cow::Owned(file)), - ..Default::default() - }) + device.create_shader_module_trusted( + wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), + }, + wgpu::ShaderRuntimeChecks::unchecked(), + ) } } @@ -61,7 +35,7 @@ impl crate::framework::Example for Example { _queue: &wgpu::Queue, ) -> Self { let (ts, ms, fs, ts_name, ms_name, fs_name) = match adapter.get_info().backend { - wgpu::Backend::Vulkan => ( + wgpu::Backend::Vulkan | wgpu::Backend::Dx12 => ( compile_wgsl(device), compile_wgsl(device), compile_wgsl(device), @@ -69,14 +43,6 @@ impl crate::framework::Example for Example { "ms_main", "fs_main", ), - wgpu::Backend::Dx12 => ( - compile_hlsl(device, "Task", "as"), - compile_hlsl(device, "Mesh", "ms"), - compile_hlsl(device, "Frag", "ps"), - "main", - "main", - "main", - ), wgpu::Backend::Metal => ( compile_msl(device, "taskShader"), compile_msl(device, "meshShader"), diff --git a/examples/features/src/mesh_shader/shader.hlsl b/examples/features/src/mesh_shader/shader.hlsl deleted file mode 100644 index e70961f2d71..00000000000 --- a/examples/features/src/mesh_shader/shader.hlsl +++ /dev/null @@ -1,53 +0,0 @@ -struct OutVertex { - float4 Position : SV_POSITION; - float4 Color: COLOR; -}; -struct OutPrimitive { - float4 ColorMask : COLOR_MASK : PRIMITIVE; - bool CullPrimitive: SV_CullPrimitive; -}; -struct InVertex { - float4 Color: COLOR; -}; -struct InPrimitive { - float4 ColorMask : COLOR_MASK : PRIMITIVE; -}; -struct PayloadData { - float4 ColorMask; - bool Visible; -}; - - -static const float4 positions[3] = {float4(0., 1.0, 0., 1.0), float4(-1.0, -1.0, 0., 1.0), float4(1.0, -1.0, 0., 1.0)}; -static const float4 colors[3] = {float4(0., 1., 0., 1.), float4(0., 0., 1., 1.), float4(1., 0., 0., 1.)}; - -groupshared PayloadData outPayload; - -[numthreads(1, 1, 1)] -void Task() { - outPayload.ColorMask = float4(1.0, 1.0, 0.0, 1.0); - outPayload.Visible = true; - DispatchMesh(3, 1, 1, outPayload); -} - -[outputtopology("triangle")] -[numthreads(1, 1, 1)] -void Mesh(out indices uint3 triangles[1], out vertices OutVertex vertices[3], out primitives OutPrimitive primitives[1], in payload PayloadData payload) { - SetMeshOutputCounts(3, 1); - - vertices[0].Position = positions[0]; - vertices[1].Position = positions[1]; - vertices[2].Position = positions[2]; - - vertices[0].Color = colors[0] * payload.ColorMask; - vertices[1].Color = colors[1] * payload.ColorMask; - vertices[2].Color = colors[2] * payload.ColorMask; - - triangles[0] = uint3(0, 1, 2); - primitives[0].ColorMask = float4(1.0, 0.0, 0.0, 1.0); - primitives[0].CullPrimitive = !payload.Visible; -} - -float4 Frag(InVertex vertex, InPrimitive primitive) : SV_Target { - return vertex.Color * primitive.ColorMask; -} diff --git a/examples/features/src/mesh_shader/shader.wgsl b/examples/features/src/mesh_shader/shader.wgsl index e0d03ea13d7..78732a68411 100644 --- a/examples/features/src/mesh_shader/shader.wgsl +++ b/examples/features/src/mesh_shader/shader.wgsl @@ -33,12 +33,15 @@ var workgroupData: f32; @task @payload(taskPayload) -@workgroup_size(1) -fn ts_main() -> @builtin(mesh_task_size) vec3 { - workgroupData = 1.0; - taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; - return vec3(1, 1, 1); +@workgroup_size(64) +fn ts_main(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { + workgroupData = 1.0; + taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); + taskPayload.visible = true; + return vec3(1, 1, 1); + } + return vec3(0, 0, 0); } struct MeshOutput { @@ -52,24 +55,27 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) -fn ms_main() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; +@workgroup_size(64) +fn ms_main(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { + mesh_output.vertex_count = 3; + mesh_output.primitive_count = 1; + workgroupData = 2.0; - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; + mesh_output.vertices[0].position = positions[0]; + mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; + mesh_output.vertices[1].position = positions[1]; + mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; + mesh_output.vertices[2].position = positions[2]; + mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + mesh_output.primitives[0].indices = vec3(0, 1, 2); + mesh_output.primitives[0].cull = !taskPayload.visible; + mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + return; + } } @fragment diff --git a/naga-test/src/lib.rs b/naga-test/src/lib.rs index 51db424ff75..18da44556ef 100644 --- a/naga-test/src/lib.rs +++ b/naga-test/src/lib.rs @@ -164,6 +164,10 @@ impl SpirvOutParameters { ray_query_initialization_tracking: true, debug_info, use_storage_input_output_16: self.use_storage_input_output_16, + task_runtime_limits: Some(naga::back::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: 256, + max_mesh_workgroups_total: 1024, + }), } } } diff --git a/naga/hlsl-snapshots/src/lib.rs b/naga/hlsl-snapshots/src/lib.rs index ee455432731..6340fd11c2d 100644 --- a/naga/hlsl-snapshots/src/lib.rs +++ b/naga/hlsl-snapshots/src/lib.rs @@ -50,6 +50,8 @@ pub struct Config { pub vertex: Vec, pub fragment: Vec, pub compute: Vec, + pub task: Vec, + pub mesh: Vec, } impl Config { @@ -59,6 +61,8 @@ impl Config { vertex: Default::default(), fragment: Default::default(), compute: Default::default(), + task: Default::default(), + mesh: Default::default(), } } @@ -85,8 +89,14 @@ impl Config { vertex, fragment, compute, + task, + mesh, } = self; - vertex.is_empty() && fragment.is_empty() && compute.is_empty() + vertex.is_empty() + && fragment.is_empty() + && compute.is_empty() + && task.is_empty() + && mesh.is_empty() } } diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs index 6cd3679e817..7a659355355 100644 --- a/naga/src/back/hlsl/conv.rs +++ b/naga/src/back/hlsl/conv.rs @@ -149,8 +149,8 @@ impl crate::StorageFormat { } impl crate::BuiltIn { - pub(super) fn to_hlsl_str(self) -> Result<&'static str, Error> { - Ok(match self { + pub(super) fn to_hlsl_str(self) -> Result, Error> { + Ok(Some(match self { Self::Position { .. } => "SV_Position", // vertex Self::ClipDistance => "SV_ClipDistance", @@ -186,13 +186,15 @@ impl crate::BuiltIn { return Err(Error::Custom(format!("Unsupported builtin {self:?}"))) } Self::CullPrimitive => "SV_CullPrimitive", - Self::PointIndex | Self::LineIndices | Self::TriangleIndices => unimplemented!(), Self::MeshTaskSize | Self::VertexCount | Self::PrimitiveCount | Self::Vertices - | Self::Primitives => unreachable!(), - }) + | Self::Primitives + | Self::PointIndex + | Self::LineIndices + | Self::TriangleIndices => return Ok(None), + })) } } diff --git a/naga/src/back/hlsl/mesh_shader.rs b/naga/src/back/hlsl/mesh_shader.rs new file mode 100644 index 00000000000..5dacfa921d0 --- /dev/null +++ b/naga/src/back/hlsl/mesh_shader.rs @@ -0,0 +1,398 @@ +use alloc::{ + format, + string::{String, ToString}, + vec::Vec, +}; + +use crate::{ + back::{ + self, + hlsl::{ + writer::{EntryPointBinding, EpStructMember, Io}, + BackendResult, Error, + }, + }, + proc::NameKey, + Handle, Module, ShaderStage, TypeInner, +}; + +impl super::Writer<'_, W> { + /// Mesh and task entry points must all return at the same `return` statement, + /// so we have a nested function that can return wherever. This writes the caller, + /// or the actual entry point. + #[expect(clippy::too_many_arguments)] + pub(super) fn write_nested_function_outer( + &mut self, + module: &Module, + func_ctx: &back::FunctionCtx, + header: &str, + name: &str, + need_workgroup_variables_initialization: bool, + nested_name: &str, + entry_point: &crate::EntryPoint, + ) -> BackendResult { + let mut any_args_written = false; + let mut separator = || { + if any_args_written { + ", " + } else { + any_args_written = true; + "" + } + }; + + let back::FunctionType::EntryPoint(ep_index) = func_ctx.ty else { + unreachable!(); + }; + let stage = module.entry_points[ep_index as usize].stage; + write!(self.out, "{header}")?; + write!(self.out, "void {name}(")?; + let mut arg_names = Vec::new(); + if let Some(ref ep_input) = self.entry_point_io.get(&(ep_index as usize)).unwrap().input { + write!(self.out, "{} {}", ep_input.ty_name, ep_input.arg_name)?; + arg_names.push(ep_input.arg_name.clone()); + } else { + for (index, arg) in entry_point.function.arguments.iter().enumerate() { + write!(self.out, "{}", separator())?; + self.write_type(module, arg.ty)?; + + let argument_name = + &self.names[&NameKey::EntryPointArgument(ep_index, index as u32)]; + arg_names.push(argument_name.clone()); + + write!(self.out, " {argument_name}")?; + if let TypeInner::Array { base, size, .. } = module.types[arg.ty].inner { + self.write_array_size(module, base, size)?; + } + + self.write_semantic(&arg.binding, Some((stage, Io::Input)))?; + } + } + if need_workgroup_variables_initialization || stage == ShaderStage::Mesh { + write!( + self.out, + "{}uint __local_invocation_index : SV_GroupIndex", + separator() + )?; + } + if let Some(ref mesh_info) = entry_point.mesh_info { + // Mesh shader wrapper + let mesh_interface = self.entry_point_io.get(&(ep_index as usize)).unwrap(); + let vert_info = mesh_interface.mesh_vertices.as_ref().unwrap(); + let prim_info = mesh_interface.mesh_primitives.as_ref().unwrap(); + let indices_info = mesh_interface.mesh_indices.as_ref().unwrap(); + write!( + self.out, + "{}out indices {} {}[{}]", + separator(), + indices_info.ty_name, + indices_info.arg_name, + mesh_info.max_primitives + )?; + write!( + self.out, + ", out vertices {} {}[{}]", + vert_info.ty_name, vert_info.arg_name, mesh_info.max_vertices + )?; + write!( + self.out, + ", out primitives {} {}[{}]", + prim_info.ty_name, prim_info.arg_name, mesh_info.max_primitives + )?; + if let Some(task_payload) = entry_point.task_payload { + // Set task payload variable + write!(self.out, ", in payload ")?; + let var = &module.global_variables[task_payload]; + self.write_type(module, var.ty)?; + + let name = &self.names[&NameKey::GlobalVariable(task_payload)]; + write!(self.out, " {name}")?; + arg_names.push(name.clone()); + if let TypeInner::Array { base, size, .. } = module.types[var.ty].inner { + self.write_array_size(module, base, size)?; + } + } + writeln!(self.out, ") {{")?; + if need_workgroup_variables_initialization { + writeln!( + self.out, + "{}if (all(__local_invocation_index == 0)) {{", + back::INDENT + )?; + self.write_workgroup_variables_initialization( + func_ctx, + module, + module.entry_points[ep_index as usize].stage, + )?; + writeln!(self.out, "{}}}", back::INDENT)?; + self.write_control_barrier(crate::Barrier::WORK_GROUP, back::Level(1))?; + } + write!(self.out, "{}{nested_name}(", back::INDENT)?; + for (i, arg_name) in arg_names.iter().enumerate() { + if i != 0 { + write!(self.out, ", ")?; + } + write!(self.out, "{arg_name}")?; + } + writeln!(self.out, ");")?; + writeln!( + self.out, + "{}GroupMemoryBarrierWithGroupSync();", + back::INDENT + )?; + + let back::FunctionType::EntryPoint(ep_idx) = func_ctx.ty else { + unreachable!() + }; + let ep = &module.entry_points[ep_idx as usize]; + let mesh_info = ep.mesh_info.as_ref().unwrap(); + let io = self.entry_point_io.get(&(ep_idx as usize)).unwrap(); + + let var_name = &self.names[&NameKey::GlobalVariable(mesh_info.output_variable)]; + let var_type = module.global_variables[mesh_info.output_variable].ty; + let wg_size: u32 = ep.workgroup_size.iter().product(); + + let get_var_member_name = |bi, var_type| { + let TypeInner::Struct { ref members, .. } = module.types[var_type].inner else { + unreachable!() + }; + let idx = members + .iter() + .position(|f| f.binding == Some(crate::Binding::BuiltIn(bi))) + .unwrap(); + self.names[&NameKey::StructMember(var_type, idx as u32)].clone() + }; + + let vert_count = format!( + "{var_name}.{}", + get_var_member_name(crate::BuiltIn::VertexCount, var_type), + ); + let prim_count = format!( + "{var_name}.{}", + get_var_member_name(crate::BuiltIn::PrimitiveCount, var_type), + ); + + let level = back::Level(1); + + writeln!( + self.out, + "{level}SetMeshOutputCounts({vert_count}, {prim_count});" + )?; + + // We need separate loops for vertices and primitives writing + struct OutputArray<'a> { + array_bi: crate::BuiltIn, + count: String, + io_interface: &'a EntryPointBinding, + is_primitive: bool, + index_name: &'static str, + ty: Handle, + } + let output_arrays = [ + OutputArray { + array_bi: crate::BuiltIn::Vertices, + count: vert_count, + io_interface: io.mesh_vertices.as_ref().unwrap(), + is_primitive: false, + index_name: "vertIndex", + ty: mesh_info.vertex_output_type, + }, + OutputArray { + array_bi: crate::BuiltIn::Primitives, + count: prim_count, + io_interface: io.mesh_primitives.as_ref().unwrap(), + is_primitive: true, + index_name: "primIndex", + ty: mesh_info.primitive_output_type, + }, + ]; + + for output in output_arrays { + let OutputArray { + array_bi, + count, + io_interface, + is_primitive, + index_name, + ty, + } = output; + let out_var_name = &io_interface.arg_name; + let index_name = self.namer.call(index_name); + let array_name = get_var_member_name(array_bi, var_type); + let item_name = format!("{var_name}.{array_name}[{index_name}]"); + writeln!( + self.out, + "{level}for (int {index_name} = __local_invocation_index; {index_name} < {count}; {index_name} += {}) {{", + wg_size + )?; + + // Loop body, uses more indentation + { + let level = level.next(); + for member in &io_interface.members { + let out_member_name = &member.name; + let in_member_name = &self.names[&NameKey::StructMember(ty, member.index)]; + writeln!(self.out, "{level}{out_var_name}[{index_name}].{out_member_name} = {item_name}.{in_member_name};",)?; + } + if is_primitive { + let indices_member_name = get_var_member_name( + mesh_info.topology.to_builtin(), + mesh_info.primitive_output_type, + ); + let indices_var_name = &io.mesh_indices.as_ref().unwrap().arg_name; + writeln!( + self.out, + "{level}{indices_var_name}[{index_name}] = {item_name}.{indices_member_name};", + )?; + } + } + + writeln!(self.out, "{level}}}")?; + } + writeln!(self.out, "}}")?; + } else { + // Task shader wrapper + writeln!(self.out, ") {{")?; + if need_workgroup_variables_initialization { + writeln!( + self.out, + "{}if (all(__local_invocation_index == 0)) {{", + back::INDENT + )?; + self.write_workgroup_variables_initialization( + func_ctx, + module, + module.entry_points[ep_index as usize].stage, + )?; + writeln!(self.out, "{}}}", back::INDENT)?; + self.write_control_barrier(crate::Barrier::WORK_GROUP, back::Level(1))?; + } + let grid_size = self.namer.call("gridSize"); + write!( + self.out, + "{}uint3 {grid_size} = {nested_name}(", + back::INDENT + )?; + for (i, arg_name) in arg_names.iter().enumerate() { + if i != 0 { + write!(self.out, ", ")?; + } + write!(self.out, "{arg_name}")?; + } + writeln!(self.out, ");")?; + writeln!( + self.out, + "{}GroupMemoryBarrierWithGroupSync();", + back::INDENT + )?; + if let Some(limits) = self.options.task_runtime_limits { + let level = back::Level(2); + writeln!(self.out, "{}if (", back::INDENT)?; + + let max_per_dim = limits.max_mesh_workgroups_per_dim.min(2 << 21); + let max_total = limits.max_mesh_workgroups_total; + for i in 0..3 { + writeln!( + self.out, + "{level}{grid_size}.{} > {max_per_dim} ||", + back::COMPONENTS[i], + )?; + } + writeln!( + self.out, + "{level}((uint64_t){grid_size}.x) * ((uint64_t){grid_size}.y) > 0xffffffffull ||" + )?; + writeln!( + self.out, + "{level}((uint64_t){grid_size}.x) * ((uint64_t){grid_size}.y) * ((uint64_t){grid_size}.z) > {max_total}", + )?; + + writeln!(self.out, "{}) {{", back::INDENT)?; + writeln!(self.out, "{level}{grid_size} = uint3(0, 0, 0);")?; + writeln!(self.out, "{}}}", back::INDENT)?; + } + writeln!( + self.out, + "{}DispatchMesh({grid_size}.x, {grid_size}.y, {grid_size}.z, {});", + back::INDENT, + self.names[&NameKey::GlobalVariable(entry_point.task_payload.unwrap())] + )?; + writeln!(self.out, "}}")?; + } + + Ok(()) + } + + pub(super) fn write_ep_mesh_output_struct( + &mut self, + module: &Module, + entry_point_name: &str, + is_primitive: bool, + mesh_info: &crate::MeshStageInfo, + ) -> Result { + let (in_type, io, var_prefix, arg_name) = if is_primitive { + ( + mesh_info.primitive_output_type, + Io::MeshPrimitives, + "Primitive", + "primitives", + ) + } else { + ( + mesh_info.vertex_output_type, + Io::MeshVertices, + "Vertex", + "vertices", + ) + }; + let struct_name = format!("Mesh{var_prefix}Output_{entry_point_name}",); + + let members = match module.types[in_type].inner { + TypeInner::Struct { ref members, .. } => members, + _ => unreachable!(), + }; + let mut out_members = Vec::new(); + for (index, member) in members.iter().enumerate() { + if matches!( + member.binding, + Some(crate::Binding::BuiltIn( + crate::BuiltIn::PointIndex + | crate::BuiltIn::LineIndices + | crate::BuiltIn::TriangleIndices + )) + ) { + continue; + } + let member_name = self.namer.call_or(&member.name, "member"); + out_members.push(EpStructMember { + name: member_name, + ty: member.ty, + binding: member.binding.clone(), + index: index as u32, + }) + } + self.write_interface_struct( + module, + (ShaderStage::Mesh, io), + struct_name, + Some(arg_name), + out_members, + ) + } + + pub(super) fn write_ep_mesh_output_indices( + &mut self, + topology: crate::MeshOutputTopology, + ) -> Result { + let (indices_name, indices_type) = match topology { + crate::MeshOutputTopology::Points => unreachable!(), + crate::MeshOutputTopology::Lines => (self.namer.call("lineIndices"), "uint2"), + crate::MeshOutputTopology::Triangles => (self.namer.call("triangleIndices"), "uint3"), + }; + Ok(EntryPointBinding { + ty_name: indices_type.to_string(), + arg_name: indices_name, + members: Vec::new(), + }) + } +} diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index 899b5dc9479..5420762b02a 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -146,6 +146,7 @@ it works for our purposes. mod conv; mod help; mod keywords; +mod mesh_shader; mod ray; mod storage; mod writer; @@ -155,7 +156,10 @@ use core::fmt::Error as FmtError; use thiserror::Error; -use crate::{back, ir, proc}; +use crate::{ + back::{self, TaskRuntimeLimits}, + ir, proc, Handle, +}; /// Direct3D 12 binding information for a global variable. /// @@ -539,6 +543,8 @@ pub struct Options { /// If set, loops will have code injected into them, forcing the compiler /// to think the number of iterations is bounded. pub force_loop_bounding: bool, + + pub task_runtime_limits: Option, } impl Default for Options { @@ -556,6 +562,7 @@ impl Default for Options { zero_initialize_workgroup_memory: true, restrict_indexing: true, force_loop_bounding: true, + task_runtime_limits: None, } } } @@ -751,4 +758,7 @@ pub struct Writer<'a, W> { /// [`AccessIndex`]: crate::Expression::AccessIndex temp_access_chain: Vec, need_bake_expressions: back::NeedBakeExpressions, + + function_task_payload_var: + crate::FastHashMap, Handle>, } diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 95feb4d9d98..a4a6ab298bc 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -3,7 +3,10 @@ use alloc::{ string::{String, ToString}, vec::Vec, }; -use core::{fmt, mem}; +use core::{ + fmt::{self, Write as _}, + mem, +}; use super::{ help, @@ -52,25 +55,25 @@ enum Index { Static(u32), } -struct EpStructMember { - name: String, - ty: Handle, +pub(super) struct EpStructMember { + pub(super) name: String, + pub(super) ty: Handle, // technically, this should always be `Some` // (we `debug_assert!` this in `write_interface_struct`) - binding: Option, - index: u32, + pub(super) binding: Option, + pub(super) index: u32, } /// Structure contains information required for generating /// wrapped structure of all entry points arguments -struct EntryPointBinding { +pub(super) struct EntryPointBinding { /// Name of the fake EP argument that contains the struct /// with all the flattened input data. - arg_name: String, + pub(super) arg_name: String, /// Generated structure name - ty_name: String, + pub(super) ty_name: String, /// Members of generated structure - members: Vec, + pub(super) members: Vec, } pub(super) struct EntryPointInterface { @@ -78,11 +81,14 @@ pub(super) struct EntryPointInterface { /// struct with members sorted by binding. /// The `EntryPointBinding::members` array is sorted by index, /// so that we can walk it in `write_ep_arguments_initialization`. - input: Option, + pub(crate) input: Option, /// If `Some`, the output of an entry point is flattened. /// The `EntryPointBinding::members` array is sorted by binding, /// So that we can walk it in `Statement::Return` handler. - output: Option, + pub(crate) output: Option, + pub(crate) mesh_vertices: Option, + pub(crate) mesh_primitives: Option, + pub(crate) mesh_indices: Option, } #[derive(Clone, Eq, PartialEq, PartialOrd, Ord)] @@ -103,9 +109,11 @@ impl InterfaceKey { } #[derive(Copy, Clone, PartialEq)] -enum Io { +pub(super) enum Io { Input, Output, + MeshVertices, + MeshPrimitives, } const fn is_subgroup_builtin_binding(binding: &Option) -> bool { @@ -147,6 +155,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { continue_ctx: back::continue_forward::ContinueCtx::default(), temp_access_chain: Vec::new(), need_bake_expressions: Default::default(), + function_task_payload_var: Default::default(), } } @@ -166,6 +175,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.written_candidate_intersection = false; self.continue_ctx.clear(); self.need_bake_expressions.clear(); + self.function_task_payload_var.clear(); } /// Generates statements to be inserted immediately before and at the very @@ -297,6 +307,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ) -> Result { self.reset(module); + if module.uses_mesh_shaders() && self.options.shader_model < ShaderModel::V6_5 { + return Err(Error::ShaderModelTooLow( + "mesh shaders".to_string(), + ShaderModel::V6_5, + )); + } + // Write special constants, if needed if let Some(ref bt) = self.options.special_constants_binding { writeln!(self.out, "struct {SPECIAL_CBUF_TYPE} {{")?; @@ -410,13 +427,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { for index in ep_range.clone() { let ep = &module.entry_points[index]; let ep_name = self.names[&NameKey::EntryPoint(index as u16)].clone(); - let ep_io = self.write_ep_interface( - module, - &ep.function, - ep.stage, - &ep_name, - fragment_entry_point, - )?; + let ep_io = self.write_ep_interface(module, ep, &ep_name, fragment_entry_point)?; self.entry_point_io.insert(index, ep_io); } @@ -461,7 +472,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_wrapped_functions(module, &ctx)?; - self.write_function(module, name.as_str(), function, &ctx, info)?; + self.write_function(module, name.as_str(), function, &ctx, info, String::new())?; writeln!(self.out)?; } @@ -507,18 +518,27 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_wrapped_functions(module, &ctx)?; + let mut header = String::new(); if ep.stage.compute_like() { // HLSL is calling workgroup size "num threads" let num_threads = ep.workgroup_size; writeln!( - self.out, + header, "[numthreads({}, {}, {})]", num_threads[0], num_threads[1], num_threads[2] )?; } + if let Some(ref info) = ep.mesh_info { + let topology_str = match info.topology { + crate::MeshOutputTopology::Points => unreachable!(), + crate::MeshOutputTopology::Lines => "line", + crate::MeshOutputTopology::Triangles => "triangle", + }; + writeln!(header, "[outputtopology(\"{topology_str}\")]")?; + } let name = self.names[&NameKey::EntryPoint(index as u16)].clone(); - self.write_function(module, &name, &ep.function, &ctx, info)?; + self.write_function(module, &name, &ep.function, &ctx, info, header)?; if index < module.entry_points.len() - 1 { writeln!(self.out)?; @@ -562,12 +582,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { //TODO: we could force fragment outputs to always go through `entry_point_io.output` path // if they are struct, so that the `stage` argument here could be omitted. - fn write_semantic( + pub(super) fn write_semantic( &mut self, binding: &Option, stage: Option<(ShaderStage, Io)>, ) -> BackendResult { - match *binding { + let per_primitive = match *binding { Some(crate::Binding::BuiltIn(builtin)) if !is_subgroup_builtin_binding(binding) => { if builtin == crate::BuiltIn::ViewIndex && self.options.shader_model < ShaderModel::V6_1 @@ -577,34 +597,49 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ShaderModel::V6_1, )); } - let builtin_str = builtin.to_hlsl_str()?; - write!(self.out, " : {builtin_str}")?; + if let Some(builtin_str) = builtin.to_hlsl_str()? { + write!(self.out, " : {builtin_str}")?; + } + false } Some(crate::Binding::Location { - blend_src: Some(1), .. + blend_src: Some(1), + per_primitive, + .. }) => { write!(self.out, " : SV_Target1")?; + per_primitive } - Some(crate::Binding::Location { location, .. }) => { + Some(crate::Binding::Location { + location, + per_primitive, + .. + }) => { if stage == Some((ShaderStage::Fragment, Io::Output)) { write!(self.out, " : SV_Target{location}")?; } else { write!(self.out, " : {LOCATION_SEMANTIC}{location}")?; } + per_primitive } - _ => {} + _ => false, + }; + if per_primitive { + write!(self.out, " : primitive")?; } Ok(()) } - fn write_interface_struct( + pub(super) fn write_interface_struct( &mut self, module: &Module, shader_stage: (ShaderStage, Io), struct_name: String, + var_name: Option<&str>, mut members: Vec, ) -> Result { + let struct_name = self.namer.call(&struct_name); // Sort the members so that first come the user-defined varyings // in ascending locations, and then built-ins. This allows VS and FS // interfaces to match with regards to order. @@ -650,13 +685,15 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // bring back the original order members.sort_by_key(|m| m.index); } - Io::Output => { + Io::Output | Io::MeshVertices | Io::MeshPrimitives => { // keep it sorted by binding } } Ok(EntryPointBinding { - arg_name: self.namer.call(struct_name.to_lowercase().as_str()), + arg_name: self + .namer + .call(var_name.unwrap_or(struct_name.to_lowercase().as_str())), ty_name: struct_name, members, }) @@ -706,7 +743,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } - self.write_interface_struct(module, (stage, Io::Input), struct_name, fake_members) + self.write_interface_struct(module, (stage, Io::Input), struct_name, None, fake_members) } /// Flatten all entry point results into a single struct. @@ -782,7 +819,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { }); } - self.write_interface_struct(module, (stage, Io::Output), struct_name, fake_members) + self.write_interface_struct(module, (stage, Io::Output), struct_name, None, fake_members) } /// Writes special interface structures for an entry point. The special structures have @@ -791,11 +828,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { fn write_ep_interface( &mut self, module: &Module, - func: &crate::Function, - stage: ShaderStage, + ep: &crate::EntryPoint, ep_name: &str, frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { + let func = &ep.function; + let stage = ep.stage; Ok(EntryPointInterface { input: if !func.arguments.is_empty() && (stage == ShaderStage::Fragment @@ -814,6 +852,21 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } _ => None, }, + mesh_vertices: if let Some(ref info) = ep.mesh_info { + Some(self.write_ep_mesh_output_struct(module, ep_name, false, info)?) + } else { + None + }, + mesh_primitives: if let Some(ref info) = ep.mesh_info { + Some(self.write_ep_mesh_output_struct(module, ep_name, true, info)?) + } else { + None + }, + mesh_indices: if let Some(ref info) = ep.mesh_info { + Some(self.write_ep_mesh_output_indices(info.topology)?) + } else { + None + }, }) } @@ -970,12 +1023,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_type(module, global.ty)?; "" } - crate::AddressSpace::WorkGroup => { + crate::AddressSpace::WorkGroup | crate::AddressSpace::TaskPayload => { write!(self.out, "groupshared ")?; self.write_type(module, global.ty)?; "" } - crate::AddressSpace::TaskPayload => unimplemented!(), crate::AddressSpace::Uniform => { // constant buffer declarations are expected to be inlined, e.g. // `cbuffer foo: register(b0) { field1: type1; }` @@ -1520,12 +1572,31 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { func: &crate::Function, func_ctx: &back::FunctionCtx<'_>, info: &valid::FunctionInfo, + header: String, ) -> BackendResult { // Function Declaration Syntax - https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-function-syntax self.update_expressions_to_bake(module, func, info); + let ep = match func_ctx.ty { + back::FunctionType::EntryPoint(idx) => Some(&module.entry_points[idx as usize]), + back::FunctionType::Function(_) => None, + }; - if let Some(ref result) = func.result { + let nested = matches!( + ep, + Some(crate::EntryPoint { + stage: ShaderStage::Task | ShaderStage::Mesh, + .. + }) + ); + if !nested { + write!(self.out, "{header}")?; + } + + if func.result.is_none() { + write!(self.out, "void")?; + } else { + let result = func.result.as_ref().unwrap(); // Write typedef if return type is an array let array_return_type = match module.types[result.ty].inner { TypeInner::Array { base, size, .. } => { @@ -1567,38 +1638,65 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } } - } else { - write!(self.out, "void")?; } + let nested_name = if nested { + self.namer.call(&format!("_{name}")) + } else { + name.to_string() + }; + // Write function name - write!(self.out, " {name}(")?; + write!(self.out, " {nested_name}(")?; let need_workgroup_variables_initialization = self.need_workgroup_variables_initialization(func_ctx, module); + let mut any_args_written = false; + let mut separator = || { + if any_args_written { + ", " + } else { + any_args_written = true; + "" + } + }; + // Write function arguments for non entry point functions match func_ctx.ty { back::FunctionType::Function(handle) => { for (index, arg) in func.arguments.iter().enumerate() { - if index != 0 { - write!(self.out, ", ")?; - } - + write!(self.out, "{}", separator())?; self.write_function_argument(module, handle, arg, index)?; } + // If this reads a task payload variable the variable needs to be passed as an `in` argument + for (var_handle, var) in module.global_variables.iter() { + let uses = info[var_handle]; + if uses.contains(valid::GlobalUse::READ) + && !uses.contains(valid::GlobalUse::WRITE) + && var.space == crate::AddressSpace::TaskPayload + { + self.function_task_payload_var.insert(handle, var_handle); + write!(self.out, "{}in ", separator())?; + + self.write_type(module, var.ty)?; + let name = &self.names[&NameKey::GlobalVariable(var_handle)]; + write!(self.out, " {name}")?; + break; + } + } } back::FunctionType::EntryPoint(ep_index) => { + let ep = &module.entry_points[ep_index as usize]; if let Some(ref ep_input) = self.entry_point_io.get(&(ep_index as usize)).unwrap().input { write!(self.out, "{} {}", ep_input.ty_name, ep_input.arg_name)?; + separator(); } else { - let stage = module.entry_points[ep_index as usize].stage; + let stage = ep.stage; for (index, arg) in func.arguments.iter().enumerate() { - if index != 0 { - write!(self.out, ", ")?; - } + write!(self.out, "{}", separator())?; self.write_type(module, arg.ty)?; let argument_name = @@ -1612,18 +1710,21 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_semantic(&arg.binding, Some((stage, Io::Input)))?; } } - if need_workgroup_variables_initialization { - if self - .entry_point_io - .get(&(ep_index as usize)) - .unwrap() - .input - .is_some() - || !func.arguments.is_empty() - { - write!(self.out, ", ")?; + if ep.stage == ShaderStage::Mesh { + if let Some(var_handle) = ep.task_payload { + let var = &module.global_variables[var_handle]; + write!(self.out, "{}in ", separator())?; + self.write_type(module, var.ty)?; + let arg_name = &self.names[&NameKey::GlobalVariable(var_handle)]; + write!(self.out, " {arg_name}")?; + if let TypeInner::Array { base, size, .. } = module.types[var.ty].inner { + self.write_array_size(module, base, size)?; + } } - write!(self.out, "uint3 __local_invocation_id : SV_GroupThreadID")?; + } + if need_workgroup_variables_initialization && !nested { + write!(self.out, "{}uint __local_invocation_index", separator())?; + write!(self.out, " : SV_GroupIndex")?; } } } @@ -1642,8 +1743,23 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out)?; writeln!(self.out, "{{")?; - if need_workgroup_variables_initialization { - self.write_workgroup_variables_initialization(func_ctx, module)?; + if need_workgroup_variables_initialization && !nested { + let back::FunctionType::EntryPoint(index) = func_ctx.ty else { + unreachable!(); + }; + writeln!( + self.out, + "{}if (all(__local_invocation_index == 0)) {{", + back::INDENT + )?; + self.write_workgroup_variables_initialization( + func_ctx, + module, + module.entry_points[index as usize].stage, + )?; + + writeln!(self.out, "{}}}", back::INDENT)?; + self.write_control_barrier(crate::Barrier::WORK_GROUP, back::Level(1))?; } if let back::FunctionType::EntryPoint(index) = func_ctx.ty { @@ -1694,6 +1810,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out, "}}")?; + if nested { + self.write_nested_function_outer( + module, + func_ctx, + &header, + name, + need_workgroup_variables_initialization, + &nested_name, + ep.unwrap(), + )?; + } + self.named_expressions.clear(); Ok(()) @@ -1775,35 +1903,31 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.options.zero_initialize_workgroup_memory && func_ctx.ty.is_compute_like_entry_point(module) && module.global_variables.iter().any(|(handle, var)| { - !func_ctx.info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup + !func_ctx.info[handle].is_empty() && var.space.is_workgroup_like() }) } - fn write_workgroup_variables_initialization( + pub(super) fn write_workgroup_variables_initialization( &mut self, func_ctx: &back::FunctionCtx, module: &Module, + stage: ShaderStage, ) -> BackendResult { - let level = back::Level(1); - - writeln!( - self.out, - "{level}if (all(__local_invocation_id == uint3(0u, 0u, 0u))) {{" - )?; - let vars = module.global_variables.iter().filter(|&(handle, var)| { - !func_ctx.info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup + // Read-only in mesh shaders + let task_needs_zero = + (var.space == crate::AddressSpace::TaskPayload) && stage == ShaderStage::Task; + !func_ctx.info[handle].is_empty() + && (var.space == crate::AddressSpace::WorkGroup || task_needs_zero) }); for (handle, var) in vars { let name = &self.names[&NameKey::GlobalVariable(handle)]; - write!(self.out, "{}{} = ", level.next(), name)?; + write!(self.out, "{}{} = ", back::Level(2), name)?; self.write_default_init(module, var.ty)?; writeln!(self.out, ";")?; } - - writeln!(self.out, "{level}}}")?; - self.write_control_barrier(crate::Barrier::WORK_GROUP, level) + Ok(()) } /// Helper method used to write switches @@ -2413,6 +2537,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { result, } => { write!(self.out, "{level}")?; + if let Some(expr) = result { write!(self.out, "const ")?; let name = Baked(expr).to_string(); @@ -2436,13 +2561,25 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } let func_name = &self.names[&NameKey::Function(function)]; write!(self.out, "{func_name}(")?; - for (index, argument) in arguments.iter().enumerate() { - if index != 0 { - write!(self.out, ", ")?; + let mut any_args_written = false; + let mut separator = || { + if any_args_written { + ", " + } else { + any_args_written = true; + "" } + }; + for argument in arguments { + write!(self.out, "{}", separator())?; self.write_expr(module, *argument, func_ctx)?; } - writeln!(self.out, ");")? + if let Some(&var) = self.function_task_payload_var.get(&function) { + let name = &self.names[&NameKey::GlobalVariable(var)]; + // Pass it through directly, whether its an in variable to this function or the global variable + write!(self.out, "{}{name}", separator())?; + } + writeln!(self.out, ");")?; } Statement::Atomic { pointer, @@ -4475,7 +4612,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { Ok(()) } - fn write_control_barrier( + pub(super) fn write_control_barrier( &mut self, barrier: crate::Barrier, level: back::Level, diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs index 91fca9e42b3..80d9176c7bb 100644 --- a/naga/src/back/mod.rs +++ b/naga/src/back/mod.rs @@ -128,6 +128,7 @@ fn get_entry_points( /// [`EntryPoint`]: crate::EntryPoint /// [`Module`]: crate::Module /// [`Module::entry_points`]: crate::Module::entry_points +#[derive(Clone, Copy, Debug)] pub enum FunctionType { /// A regular function. Function(crate::Handle), @@ -371,3 +372,11 @@ pub enum RayIntersectionType { Triangle = 1, BoundingBox = 4, } + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] +pub struct TaskRuntimeLimits { + pub max_mesh_workgroups_per_dim: u32, + pub max_mesh_workgroups_total: u32, +} diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index aa4737281fb..a6431e37eaf 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -237,12 +237,11 @@ impl Writer { ir_result: &crate::FunctionResult, result_members: &[ResultMember], body: &mut Vec, - task_payload: Option, ) -> Result { for (index, res_member) in result_members.iter().enumerate() { // This isn't a real builtin, and is handled elsewhere if res_member.built_in == Some(crate::BuiltIn::MeshTaskSize) { - continue; + return Ok(Instruction::return_value(value_id)); } let member_value_id = match ir_result.binding { Some(_) => value_id, @@ -274,13 +273,7 @@ impl Writer { _ => {} } } - self.try_write_entry_point_task_return( - value_id, - ir_result, - result_members, - body, - task_payload, - ) + Ok(Instruction::return_void()) } } @@ -3754,7 +3747,6 @@ impl BlockContext<'_> { self.ir_function.result.as_ref().unwrap(), &context.results, &mut block.body, - context.task_payload_variable_id, )?, None => Instruction::return_value(value_id), }; @@ -3762,18 +3754,7 @@ impl BlockContext<'_> { return Ok(BlockExitDisposition::Discarded); } Statement::Return { value: None } => { - if let Some(super::EntryPointContext { - mesh_state: Some(ref mesh_state), - .. - }) = self.function.entry_point_context - { - self.function.consume( - block, - Instruction::branch(mesh_state.entry_point_epilogue_id), - ); - } else { - self.function.consume(block, Instruction::return_void()); - } + self.function.consume(block, Instruction::return_void()); return Ok(BlockExitDisposition::Discarded); } Statement::Kill => { @@ -4241,16 +4222,6 @@ impl BlockContext<'_> { LoopContext::default(), debug_info, )?; - if let Some(super::EntryPointContext { - mesh_state: Some(ref mesh_state), - .. - }) = self.function.entry_point_context - { - let mut block = Block::new(mesh_state.entry_point_epilogue_id); - self.writer - .write_mesh_shader_return(mesh_state, &mut block)?; - self.function.consume(block, Instruction::return_void()); - } Ok(()) } diff --git a/naga/src/back/spv/mesh_shader.rs b/naga/src/back/spv/mesh_shader.rs index a026c763af7..d0c08895aa9 100644 --- a/naga/src/back/spv/mesh_shader.rs +++ b/naga/src/back/spv/mesh_shader.rs @@ -4,7 +4,7 @@ use spirv::Word; use crate::{ back::spv::{ helpers::BindingDecorations, writer::FunctionInterface, Block, EntryPointContext, Error, - Instruction, ResultMember, WriterFlags, + Instruction, WriterFlags, }, non_max_u32::NonMaxU32, Handle, @@ -36,15 +36,9 @@ pub struct MeshReturnInfo { /// All members of the output variable struct type out_members: Vec, /// Id of the input variable for local invocation id - local_invocation_index_id: Word, + local_invocation_index_var_id: Word, /// Total workgroup size (product) workgroup_size: u32, - /// Variable to be used later when saving the output as a loop index - loop_counter_vertices: Word, - /// Variable to be used later when saving the output as a loop index - loop_counter_primitives: Word, - /// The id of the label to jump to when `return` is called - pub entry_point_epilogue_id: Word, /// Vertex-specific info vertex_info: PerOutputTypeMeshReturnInfo, @@ -55,16 +49,6 @@ pub struct MeshReturnInfo { } impl super::Writer { - pub(super) fn require_mesh_shaders(&mut self) -> Result<(), Error> { - self.use_extension("SPV_EXT_mesh_shader"); - self.require_any("Mesh Shaders", &[spirv::Capability::MeshShadingEXT])?; - let lang_version = self.lang_version(); - if lang_version.0 <= 1 && lang_version.1 < 4 { - return Err(Error::SpirvVersionTooLow(1, 4)); - } - Ok(()) - } - /// Sets up an output variable that will handle part of the mesh shader output pub(super) fn write_mesh_return_global_variable( &mut self, @@ -88,7 +72,6 @@ impl super::Writer { iface: &mut FunctionInterface, local_invocation_index_id: Option, ir_module: &crate::Module, - prelude: &mut Block, ep_context: &mut EntryPointContext, ) -> Result<(), Error> { let Some(ref mesh_info) = iface.mesh_info else { @@ -146,7 +129,7 @@ impl super::Writer { _ => unreachable!(), }; // In the final return, we do a giant memcpy, for which this is helpful - let local_invocation_index_id = match local_invocation_index_id { + let local_invocation_index_var_id = match local_invocation_index_id { Some(a) => a, None => { let u32_id = self.get_u32_type_id(); @@ -166,48 +149,17 @@ impl super::Writer { .to_words(&mut self.logical_layout.annotations); iface.varying_ids.push(var); - let loaded_value = self.id_gen.next(); - prelude - .body - .push(Instruction::load(u32_id, loaded_value, var, None)); - loaded_value + var } }; - let u32_id = self.get_u32_type_id(); - // A general function variable that we guarantee to allow in the final return. It must be - // declared at the top of the function. Currently it is used in the memcpy part to keep - // track of the current index to copy. - let loop_counter_1 = self.id_gen.next(); - let loop_counter_2 = self.id_gen.next(); - prelude.body.insert( - 0, - Instruction::variable( - self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), - loop_counter_1, - spirv::StorageClass::Function, - None, - ), - ); - prelude.body.insert( - 1, - Instruction::variable( - self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), - loop_counter_2, - spirv::StorageClass::Function, - None, - ), - ); // This is the information that is passed to the function writer // so that it can write the final return logic let mut mesh_return_info = MeshReturnInfo { out_variable_id: self.global_variables[mesh_info.output_variable].var_id, out_members, - local_invocation_index_id, + local_invocation_index_var_id, workgroup_size: self .get_constant_scalar(crate::Literal::U32(iface.workgroup_size.iter().product())), - loop_counter_vertices: loop_counter_1, - loop_counter_primitives: loop_counter_2, - entry_point_epilogue_id: self.id_gen.next(), vertex_info: PerOutputTypeMeshReturnInfo { array_type_id: vertex_array_type_id, @@ -457,60 +409,33 @@ impl super::Writer { Ok(()) } - pub(super) fn try_write_entry_point_task_return( + pub(super) fn write_entry_point_task_return( &mut self, value_id: Word, - ir_result: &crate::FunctionResult, - result_members: &[ResultMember], body: &mut Vec, - task_payload: Option, + task_payload: Word, ) -> Result { // OpEmitMeshTasksEXT must be called right before exiting (after setting other // output variables if there are any) - for (index, res_member) in result_members.iter().enumerate() { - if res_member.built_in == Some(crate::BuiltIn::MeshTaskSize) { - self.write_control_barrier(crate::Barrier::WORK_GROUP, body); - // If its a function like `fn a() -> @builtin(...) vec3 ...` - // then just use the output value. If it's a struct, extract the - // value from the struct. - let member_value_id = match ir_result.binding { - Some(_) => value_id, - None => { - let member_value_id = self.id_gen.next(); - body.push(Instruction::composite_extract( - res_member.type_id, - member_value_id, - value_id, - &[index as Word], - )); - member_value_id - } - }; - // Extract the vec3 into 3 u32's - let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; - for (i, &value) in values.iter().enumerate() { - let instruction = Instruction::composite_extract( - self.get_u32_type_id(), - value, - member_value_id, - &[i as Word], - ); - body.push(instruction); - } - // TODO: make this guaranteed to be uniform - let mut instruction = Instruction::new(spirv::Op::EmitMeshTasksEXT); - for id in values { - instruction.add_operand(id); - } - // We have to include the task payload in our call - if let Some(task_payload) = task_payload { - instruction.add_operand(task_payload); - } - return Ok(instruction); - } + // Extract the vec3 into 3 u32's + let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, &value) in values.iter().enumerate() { + let instruction = Instruction::composite_extract( + self.get_u32_type_id(), + value, + value_id, + &[i as Word], + ); + body.push(instruction); + } + let mut instruction = Instruction::new(spirv::Op::EmitMeshTasksEXT); + for id in values { + instruction.add_operand(id); } - Ok(Instruction::return_void()) + // We have to include the task payload in our call + instruction.add_operand(task_payload); + Ok(instruction) } /// This writes the actual loop @@ -708,9 +633,10 @@ impl super::Writer { &mut self, return_info: &MeshReturnInfo, block: &mut Block, + loop_counter_vertices: u32, + loop_counter_primitives: u32, + local_invocation_index_id: Word, ) -> Result<(), Error> { - // Start with a control barrier so that everything that follows is guaranteed to see the same variables - self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); let u32_id = self.get_u32_type_id(); // Load the actual vertex and primitive counts @@ -780,8 +706,6 @@ impl super::Writer { return_info.primitive_info.array_type_id, ); - self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); - // This must be called exactly once before any other mesh outputs are written { let mut ins = Instruction::new(spirv::Op::SetMeshOutputsEXT); @@ -798,8 +722,8 @@ impl super::Writer { let func_end = self.id_gen.next(); block.body.push(Instruction::store( - return_info.loop_counter_vertices, - return_info.local_invocation_index_id, + loop_counter_vertices, + local_invocation_index_id, None, )); block.body.push(Instruction::branch(vertex_loop_header)); @@ -807,7 +731,7 @@ impl super::Writer { let vertex_copy_body = self.write_mesh_copy_body( false, return_info, - return_info.loop_counter_vertices, + loop_counter_vertices, vert_array_ptr, prim_array_ptr, ); @@ -818,7 +742,7 @@ impl super::Writer { vertex_loop_header, in_between_loops, vert_count_id, - return_info.loop_counter_vertices, + loop_counter_vertices, return_info, ); @@ -827,8 +751,8 @@ impl super::Writer { block.body.push(Instruction::label(in_between_loops)); block.body.push(Instruction::store( - return_info.loop_counter_primitives, - return_info.local_invocation_index_id, + loop_counter_primitives, + local_invocation_index_id, None, )); @@ -837,7 +761,7 @@ impl super::Writer { let primitive_copy_body = self.write_mesh_copy_body( true, return_info, - return_info.loop_counter_primitives, + loop_counter_primitives, vert_array_ptr, prim_array_ptr, ); @@ -848,11 +772,211 @@ impl super::Writer { prim_loop_header, func_end, prim_count_id, - return_info.loop_counter_primitives, + loop_counter_primitives, return_info, ); block.body.push(Instruction::label(func_end)); Ok(()) } + + pub(super) fn write_mesh_shader_wrapper( + &mut self, + return_info: &MeshReturnInfo, + inner_id: u32, + ) -> Result { + let out_id = self.id_gen.next(); + let mut function = super::Function::default(); + let lookup_function_type = super::LookupFunctionType { + parameter_type_ids: alloc::vec![], + return_type_id: self.void_type, + }; + let function_type = self.get_function_type(lookup_function_type); + function.signature = Some(Instruction::function( + self.void_type, + out_id, + spirv::FunctionControl::empty(), + function_type, + )); + let u32_id = self.get_u32_type_id(); + { + let mut block = Block::new(self.id_gen.next()); + // A general function variable that we guarantee to allow in the final return. It must be + // declared at the top of the function. Currently it is used in the memcpy part to keep + // track of the current index to copy. + let loop_counter_vertices = self.id_gen.next(); + let loop_counter_primitives = self.id_gen.next(); + block.body.insert( + 0, + Instruction::variable( + self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), + loop_counter_vertices, + spirv::StorageClass::Function, + None, + ), + ); + block.body.insert( + 1, + Instruction::variable( + self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), + loop_counter_primitives, + spirv::StorageClass::Function, + None, + ), + ); + let local_invocation_index_id = self.id_gen.next(); + block.body.push(Instruction::load( + u32_id, + local_invocation_index_id, + return_info.local_invocation_index_var_id, + None, + )); + block.body.push(Instruction::function_call( + self.void_type, + self.id_gen.next(), + inner_id, + &[], + )); + self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); + self.write_mesh_shader_return( + return_info, + &mut block, + loop_counter_vertices, + loop_counter_primitives, + local_invocation_index_id, + )?; + function.consume(block, Instruction::return_void()); + } + function.to_words(&mut self.logical_layout.function_definitions); + Ok(out_id) + } + + pub(super) fn write_task_shader_wrapper( + &mut self, + task_payload: Word, + inner_id: u32, + ) -> Result { + let out_id = self.id_gen.next(); + let mut function = super::Function::default(); + let lookup_function_type = super::LookupFunctionType { + parameter_type_ids: alloc::vec![], + return_type_id: self.void_type, + }; + let function_type = self.get_function_type(lookup_function_type); + function.signature = Some(Instruction::function( + self.void_type, + out_id, + spirv::FunctionControl::empty(), + function_type, + )); + + { + let mut block = Block::new(self.id_gen.next()); + let result = self.id_gen.next(); + block.body.push(Instruction::function_call( + self.get_vec3u_type_id(), + result, + inner_id, + &[], + )); + self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); + let final_value = if let Some(task_limits) = self.task_runtime_limits { + let zero_u32 = self.get_constant_scalar(crate::Literal::U32(0)); + // If its greater than 2<<21 then overflow is possible without being caught + let max_per_dim = self.get_constant_scalar(crate::Literal::U64( + task_limits.max_mesh_workgroups_per_dim.min(2 << 21) as u64, + )); + let max_total = self.get_constant_scalar(crate::Literal::U64( + task_limits.max_mesh_workgroups_total as u64, + )); + let u64_type_id = self + .get_numeric_type_id(crate::back::spv::NumericType::Scalar(crate::Scalar::U64)); + let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, value) in values.into_iter().enumerate() { + let u32_val = self.id_gen.next(); + block.body.push(Instruction::composite_extract( + self.get_u32_type_id(), + u32_val, + result, + &[i as u32], + )); + block.body.push(Instruction::unary( + spirv::Op::UConvert, + u64_type_id, + value, + u32_val, + )); + } + let prod_1 = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IMul, + u64_type_id, + prod_1, + values[0], + values[1], + )); + let prod = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IMul, + u64_type_id, + prod, + prod_1, + values[2], + )); + let total_too_large = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::UGreaterThanEqual, + self.get_bool_type_id(), + total_too_large, + prod, + max_total, + )); + + let too_large = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, value) in values.into_iter().enumerate() { + block.body.push(Instruction::binary( + spirv::Op::UGreaterThanEqual, + self.get_bool_type_id(), + too_large[i], + value, + max_per_dim, + )); + } + let mut current = total_too_large; + for is_too_large in too_large { + let new = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::LogicalOr, + self.get_bool_type_id(), + new, + current, + is_too_large, + )); + current = new; + } + let zero_vec3 = self.id_gen.next(); + block.body.push(Instruction::composite_construct( + self.get_vec3u_type_id(), + zero_vec3, + &[zero_u32, zero_u32, zero_u32], + )); + let final_result = self.id_gen.next(); + block.body.push(Instruction::select( + self.get_vec3u_type_id(), + final_result, + current, + zero_vec3, + result, + )); + final_result + } else { + result + }; + let ins = + self.write_entry_point_task_return(final_value, &mut block.body, task_payload)?; + function.consume(block, ins); + } + function.to_words(&mut self.logical_layout.function_definitions); + Ok(out_id) + } } diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 504575a2e31..e97fc231a7c 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -120,6 +120,7 @@ use spirv::Word; use thiserror::Error; use crate::arena::{Handle, HandleVec}; +use crate::back::TaskRuntimeLimits; use crate::proc::{BoundsCheckPolicies, TypeResolution}; #[derive(Clone)] @@ -967,6 +968,8 @@ pub struct Writer { /// Non semantic debug printf extension `OpExtInstImport` debug_printf: Option, pub(crate) ray_query_initialization_tracking: bool, + + task_runtime_limits: Option, } bitflags::bitflags! { @@ -1084,6 +1087,8 @@ pub struct Options<'a> { pub use_storage_input_output_16: bool, pub debug_info: Option>, + + pub task_runtime_limits: Option, } impl Default for Options<'_> { @@ -1106,6 +1111,7 @@ impl Default for Options<'_> { ray_query_initialization_tracking: true, use_storage_input_output_16: true, debug_info: None, + task_runtime_limits: None, } } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 094aaed1a93..c3297cc5d56 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -114,6 +114,7 @@ impl Writer { options.use_storage_input_output_16, ), debug_printf: None, + task_runtime_limits: options.task_runtime_limits, }) } @@ -132,6 +133,7 @@ impl Writer { self.binding_map = options.binding_map.clone(); self.io_f16_polyfills = super::f16_polyfill::F16IoPolyfill::new(options.use_storage_input_output_16); + self.task_runtime_limits = options.task_runtime_limits; Ok(()) } @@ -170,6 +172,7 @@ impl Writer { capabilities_available: take(&mut self.capabilities_available), fake_missing_bindings: self.fake_missing_bindings, binding_map: take(&mut self.binding_map), + task_runtime_limits: self.task_runtime_limits, // Initialized afresh: id_gen, @@ -408,14 +411,6 @@ impl Writer { self.get_pointer_type_id(vec2u_id, class) } - pub(super) fn get_vec3u_pointer_type_id(&mut self, class: spirv::StorageClass) -> Word { - let vec3u_id = self.get_numeric_type_id(NumericType::Vector { - size: crate::VectorSize::Tri, - scalar: crate::Scalar::U32, - }); - self.get_pointer_type_id(vec3u_id, class) - } - pub(super) fn get_bool_type_id(&mut self) -> Word { self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::BOOL)) } @@ -1195,10 +1190,9 @@ impl Writer { mesh_state: None, }; - let mut local_invocation_id = None; - let mut parameter_type_ids = Vec::with_capacity(ir_function.arguments.len()); + let mut local_invocation_index_var_id = None; let mut local_invocation_index_id = None; for argument in ir_function.arguments.iter() { @@ -1228,13 +1222,9 @@ impl Writer { varying_id, argument_type_id, ); - - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); - } else if binding - == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) - { + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { local_invocation_index_id = Some(id); + local_invocation_index_var_id = Some(varying_id); } id @@ -1259,13 +1249,10 @@ impl Writer { let id = self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id); constituent_ids.push(id); - - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); - } else if binding - == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { local_invocation_index_id = Some(id); + local_invocation_index_var_id = Some(varying_id); } } prelude.body.push(Instruction::composite_construct( @@ -1393,7 +1380,11 @@ impl Writer { .body .push(Instruction::store(varying_id, default_value_id, None)); } - self.void_type + if iface.stage == crate::ShaderStage::Task { + self.get_vec3u_type_id() + } else { + self.void_type + } } else { self.get_handle_type_id(result.ty) } @@ -1409,9 +1400,8 @@ impl Writer { } self.write_entry_point_mesh_shader_info( iface, - local_invocation_index_id, + local_invocation_index_var_id, ir_module, - &mut prelude, &mut ep_context, )?; } @@ -1654,7 +1644,7 @@ impl Writer { next_id, ir_module, info, - local_invocation_id, + local_invocation_index_id, interface, context.function, ), @@ -1679,7 +1669,21 @@ impl Writer { function.to_words(&mut self.logical_layout.function_definitions); - Ok(function_id) + if let Some(EntryPointContext { + mesh_state: Some(ref mesh_state), + .. + }) = function.entry_point_context + { + self.write_mesh_shader_wrapper(mesh_state, function_id) + } else if let Some(EntryPointContext { + task_payload_variable_id: Some(tp), + .. + }) = function.entry_point_context + { + self.write_task_shader_wrapper(tp, function_id) + } else { + Ok(function_id) + } } fn write_execution_mode( @@ -1702,6 +1706,7 @@ impl Writer { debug_info: &Option, ) -> Result { let mut interface_ids = Vec::new(); + let function_id = self.write_function( &entry_point.function, info, @@ -2701,7 +2706,7 @@ impl Writer { entry_id: Word, ir_module: &crate::Module, info: &FunctionInfo, - local_invocation_id: Option, + local_invocation_index: Option, interface: &mut FunctionInterface, function: &mut Function, ) -> Option { @@ -2709,7 +2714,10 @@ impl Writer { .global_variables .iter() .filter(|&(handle, var)| { - !info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup + let task_exception = (var.space == crate::AddressSpace::TaskPayload) + && interface.stage == crate::ShaderStage::Task; + !info[handle].is_empty() + && (var.space == crate::AddressSpace::WorkGroup || task_exception) }) .map(|(handle, var)| { // It's safe to use `var_id` here, not `access_id`, because only @@ -2726,16 +2734,15 @@ impl Writer { return None; } - let uint3_type_id = self.get_vec3u_type_id(); - let mut pre_if_block = Block::new(entry_id); - let local_invocation_id = if let Some(local_invocation_id) = local_invocation_id { - local_invocation_id + let local_invocation_index = if let Some(local_invocation_index) = local_invocation_index { + local_invocation_index } else { let varying_id = self.id_gen.next(); let class = spirv::StorageClass::Input; - let pointer_type_id = self.get_vec3u_pointer_type_id(class); + let u32_ty_id = self.get_u32_type_id(); + let pointer_type_id = self.get_pointer_type_id(u32_ty_id, class); Instruction::variable(pointer_type_id, varying_id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -2743,39 +2750,29 @@ impl Writer { self.decorate( varying_id, spirv::Decoration::BuiltIn, - &[spirv::BuiltIn::LocalInvocationId as u32], + &[spirv::BuiltIn::LocalInvocationIndex as u32], ); interface.varying_ids.push(varying_id); let id = self.id_gen.next(); pre_if_block .body - .push(Instruction::load(uint3_type_id, id, varying_id, None)); + .push(Instruction::load(u32_ty_id, id, varying_id, None)); id }; - let zero_id = self.get_constant_null(uint3_type_id); - let bool3_type_id = self.get_vec3_bool_type_id(); + let zero_id = self.get_constant_scalar(crate::Literal::U32(0)); let eq_id = self.id_gen.next(); pre_if_block.body.push(Instruction::binary( spirv::Op::IEqual, - bool3_type_id, + self.get_bool_type_id(), eq_id, - local_invocation_id, + local_invocation_index, zero_id, )); - let condition_id = self.id_gen.next(); - let bool_type_id = self.get_bool_type_id(); - pre_if_block.body.push(Instruction::relational( - spirv::Op::All, - bool_type_id, - condition_id, - eq_id, - )); - let merge_id = self.id_gen.next(); pre_if_block.body.push(Instruction::selection_merge( merge_id, @@ -2785,7 +2782,7 @@ impl Writer { let accept_id = self.id_gen.next(); function.consume( pre_if_block, - Instruction::branch_conditional(condition_id, accept_id, merge_id), + Instruction::branch_conditional(eq_id, accept_id, merge_id), ); let accept_block = Block { @@ -3010,7 +3007,6 @@ impl Writer { } if per_primitive && stage == crate::ShaderStage::Fragment { others.push(Decoration::PerPrimitiveEXT); - self.require_mesh_shaders()?; } Ok(BindingDecorations::Location { location, @@ -3022,13 +3018,6 @@ impl Writer { use crate::BuiltIn as Bi; let mut others = ArrayVec::new(); - if matches!( - built_in, - Bi::CullPrimitive | Bi::PointIndex | Bi::LineIndices | Bi::TriangleIndices - ) { - self.require_mesh_shaders()?; - } - let built_in = match built_in { Bi::Position { invariant } => { if invariant { @@ -3140,22 +3129,12 @@ impl Writer { BuiltIn::SubgroupLocalInvocationId } Bi::CullPrimitive => { - self.require_mesh_shaders()?; others.push(Decoration::PerPrimitiveEXT); BuiltIn::CullPrimitiveEXT } - Bi::PointIndex => { - self.require_mesh_shaders()?; - BuiltIn::PrimitivePointIndicesEXT - } - Bi::LineIndices => { - self.require_mesh_shaders()?; - BuiltIn::PrimitiveLineIndicesEXT - } - Bi::TriangleIndices => { - self.require_mesh_shaders()?; - BuiltIn::PrimitiveTriangleIndicesEXT - } + Bi::PointIndex => BuiltIn::PrimitivePointIndicesEXT, + Bi::LineIndices => BuiltIn::PrimitiveLineIndicesEXT, + Bi::TriangleIndices => BuiltIn::PrimitiveTriangleIndicesEXT, // No decoration, this EmitMeshTasksEXT is called at function return Bi::MeshTaskSize => return Ok(BindingDecorations::None), // These aren't normal builtins and don't occur in function output @@ -3517,17 +3496,6 @@ impl Writer { | ir_module.special_types.ray_intersection.is_some(); let has_vertex_return = ir_module.special_types.ray_vertex_return.is_some(); - // Ways mesh shaders are required: - // * Mesh entry point used - checked for - // * Mesh function like setVertex used outside mesh entry point, this is handled when those are written - // * Fragment shader with per primitive data - handled in `map_binding` - let has_mesh_shaders = ir_module.entry_points.iter().any(|entry| { - entry.stage == crate::ShaderStage::Mesh || entry.stage == crate::ShaderStage::Task - }) || ir_module - .global_variables - .iter() - .any(|gvar| gvar.1.space == crate::AddressSpace::TaskPayload); - for (_, &crate::Type { ref inner, .. }) in ir_module.types.iter() { // spirv does not know whether these have vertex return - that is done by us if let &crate::TypeInner::AccelerationStructure { .. } @@ -3554,8 +3522,13 @@ impl Writer { Instruction::extension("SPV_KHR_ray_tracing_position_fetch") .to_words(&mut self.logical_layout.extensions); } - if has_mesh_shaders { - self.require_mesh_shaders()?; + if ir_module.uses_mesh_shaders() { + self.use_extension("SPV_EXT_mesh_shader"); + self.require_any("Mesh Shaders", &[spirv::Capability::MeshShadingEXT])?; + let lang_version = self.lang_version(); + if lang_version.0 <= 1 && lang_version.1 < 4 { + return Err(Error::SpirvVersionTooLow(1, 4)); + } } Instruction::type_void(self.void_type).to_words(&mut self.logical_layout.declarations); Instruction::ext_inst_import(self.gl450_ext_inst_id, "GLSL.std.450") diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index 7976a024ab2..81259e4ed3f 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -278,7 +278,6 @@ impl Writer { let mut needs_f16 = false; let mut needs_dual_source_blending = false; let mut needs_clip_distances = false; - let mut needs_mesh_shaders = false; // Determine which `enable` declarations are needed for (_, ty) in module.types.iter() { @@ -299,25 +298,6 @@ impl Writer { crate::Binding::BuiltIn(crate::BuiltIn::ClipDistance) => { needs_clip_distances = true; } - crate::Binding::Location { - per_primitive: true, - .. - } => { - needs_mesh_shaders = true; - } - crate::Binding::BuiltIn( - crate::BuiltIn::MeshTaskSize - | crate::BuiltIn::CullPrimitive - | crate::BuiltIn::PointIndex - | crate::BuiltIn::LineIndices - | crate::BuiltIn::TriangleIndices - | crate::BuiltIn::VertexCount - | crate::BuiltIn::Vertices - | crate::BuiltIn::PrimitiveCount - | crate::BuiltIn::Primitives, - ) => { - needs_mesh_shaders = true; - } _ => {} } } @@ -326,22 +306,6 @@ impl Writer { } } - if module - .entry_points - .iter() - .any(|ep| matches!(ep.stage, ShaderStage::Mesh | ShaderStage::Task)) - { - needs_mesh_shaders = true; - } - - if module - .global_variables - .iter() - .any(|gv| gv.1.space == crate::AddressSpace::TaskPayload) - { - needs_mesh_shaders = true; - } - // Write required declarations let mut any_written = false; if needs_f16 { @@ -356,7 +320,7 @@ impl Writer { writeln!(self.out, "enable clip_distances;")?; any_written = true; } - if needs_mesh_shaders { + if module.uses_mesh_shaders() { writeln!(self.out, "enable wgpu_mesh_shader;")?; any_written = true; } diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index 832abc9acaf..92c40eb8801 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -664,6 +664,14 @@ impl super::ShaderStage { Self::Compute | Self::Task | Self::Mesh => true, } } + + /// Mesh or task shader + pub const fn mesh_like(self) -> bool { + match self { + Self::Task | Self::Mesh => true, + Self::Vertex | Self::Fragment | Self::Compute => false, + } + } } #[test] @@ -853,4 +861,64 @@ impl crate::Module { .map(|a| a.with_span_handle(self.global_variables[gv].ty, &self.types)), ) } + + pub fn uses_mesh_shaders(&self) -> bool { + for (_, ty) in self.types.iter() { + match ty.inner { + crate::TypeInner::Struct { ref members, .. } => { + for member in members { + if matches!( + member.binding, + Some(crate::Binding::BuiltIn( + crate::BuiltIn::MeshTaskSize + | crate::BuiltIn::CullPrimitive + | crate::BuiltIn::PointIndex + | crate::BuiltIn::LineIndices + | crate::BuiltIn::TriangleIndices + | crate::BuiltIn::VertexCount + | crate::BuiltIn::Vertices + | crate::BuiltIn::PrimitiveCount + | crate::BuiltIn::Primitives, + )) + ) { + return true; + } + } + } + _ => (), + } + } + if self.entry_points.iter().any(|ep| { + matches!( + ep.stage, + crate::ShaderStage::Mesh | crate::ShaderStage::Task + ) + }) { + return true; + } + if self + .global_variables + .iter() + .any(|gv| gv.1.space == crate::AddressSpace::TaskPayload) + { + return true; + } + false + } +} + +impl crate::MeshOutputTopology { + pub const fn to_builtin(self) -> crate::BuiltIn { + match self { + Self::Points => crate::BuiltIn::PointIndex, + Self::Lines => crate::BuiltIn::LineIndices, + Self::Triangles => crate::BuiltIn::TriangleIndices, + } + } +} + +impl crate::AddressSpace { + pub const fn is_workgroup_like(self) -> bool { + matches!(self, Self::WorkGroup | Self::TaskPayload) + } } diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs index d1ae0618902..c9d5ebabd25 100644 --- a/naga/src/valid/analyzer.rs +++ b/naga/src/valid/analyzer.rs @@ -270,7 +270,7 @@ pub struct FunctionInfo { /// `FunctionInfo` implements `core::ops::Index>`, /// so you can simply index this struct with a global handle to retrieve /// its usage information. - global_uses: Box<[GlobalUse]>, + pub global_uses: Box<[GlobalUse]>, /// Information about each expression in this function's body. /// diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index faa00478328..e5b1e9ed2af 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -1087,9 +1087,11 @@ impl super::Validator { } // Task shaders must have a single `MeshTaskSize` output, and nothing else. if ep.stage == crate::ShaderStage::Task { - let ok = result_built_ins.contains(&crate::BuiltIn::MeshTaskSize) - && result_built_ins.len() == 1 - && self.location_mask.is_empty(); + let ok = module.types[fr.ty].inner + == crate::TypeInner::Vector { + size: crate::VectorSize::Tri, + scalar: crate::Scalar::U32, + }; if !ok { return Err(EntryPointError::WrongTaskShaderEntryResult.with_span()); } diff --git a/naga/tests/in/wgsl/mesh-shader-empty.toml b/naga/tests/in/wgsl/mesh-shader-empty.toml index 148b7004995..24e00ffee54 100644 --- a/naga/tests/in/wgsl/mesh-shader-empty.toml +++ b/naga/tests/in/wgsl/mesh-shader-empty.toml @@ -1,6 +1,9 @@ capabilities = "MESH_SHADER" -targets = "WGSL | SPIRV" +targets = "WGSL | SPIRV | HLSL" [spv] version = [1, 4] capabilities = ["MeshShadingEXT"] + +[hlsl] +shader_model = "V6_5" diff --git a/naga/tests/in/wgsl/mesh-shader-empty.wgsl b/naga/tests/in/wgsl/mesh-shader-empty.wgsl index 98a6bf8448b..27ed9966117 100644 --- a/naga/tests/in/wgsl/mesh-shader-empty.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-empty.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader-lines.toml b/naga/tests/in/wgsl/mesh-shader-lines.toml index 148b7004995..24e00ffee54 100644 --- a/naga/tests/in/wgsl/mesh-shader-lines.toml +++ b/naga/tests/in/wgsl/mesh-shader-lines.toml @@ -1,6 +1,9 @@ capabilities = "MESH_SHADER" -targets = "WGSL | SPIRV" +targets = "WGSL | SPIRV | HLSL" [spv] version = [1, 4] capabilities = ["MeshShadingEXT"] + +[hlsl] +shader_model = "V6_5" diff --git a/naga/tests/in/wgsl/mesh-shader-lines.wgsl b/naga/tests/in/wgsl/mesh-shader-lines.wgsl index c475ff10619..c07bc2a7a53 100644 --- a/naga/tests/in/wgsl/mesh-shader-lines.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-lines.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader-points.wgsl b/naga/tests/in/wgsl/mesh-shader-points.wgsl index 84516ee8f2a..31429e2df55 100644 --- a/naga/tests/in/wgsl/mesh-shader-points.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-points.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader.toml b/naga/tests/in/wgsl/mesh-shader.toml index 148b7004995..1adabc9d454 100644 --- a/naga/tests/in/wgsl/mesh-shader.toml +++ b/naga/tests/in/wgsl/mesh-shader.toml @@ -1,6 +1,13 @@ capabilities = "MESH_SHADER" -targets = "WGSL | SPIRV" +targets = "WGSL | SPIRV | HLSL" [spv] version = [1, 4] capabilities = ["MeshShadingEXT"] + +[hlsl] +shader_model = "V6_5" + +[hlsl.task_runtime_limits] +max_mesh_workgroups_per_dim = 256 +max_mesh_workgroups_total = 1024 diff --git a/naga/tests/in/wgsl/mesh-shader.wgsl b/naga/tests/in/wgsl/mesh-shader.wgsl index ca2f9c911aa..9b57acd10a9 100644 --- a/naga/tests/in/wgsl/mesh-shader.wgsl +++ b/naga/tests/in/wgsl/mesh-shader.wgsl @@ -31,13 +31,21 @@ struct PrimitiveInput { var taskPayload: TaskPayload; var workgroupData: f32; +fn helper_reader() -> bool { + return taskPayload.visible; +} +fn helper_writer(value: bool) { + taskPayload.visible = value; +} + @task @payload(taskPayload) @workgroup_size(1) fn ts_main() -> @builtin(mesh_task_size) vec3 { workgroupData = 1.0; taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; + helper_writer(true); + taskPayload.visible = helper_reader(); return vec3(1, 1, 1); } @@ -45,8 +53,8 @@ fn ts_main() -> @builtin(mesh_task_size) vec3 { @task @payload(taskPayload) @workgroup_size(2) -fn ts_divergent(@builtin(local_invocation_index) thread_id: u32) -> @builtin(mesh_task_size) vec3 { - if thread_id == 0 { +fn ts_divergent(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); taskPayload.visible = true; return vec3(1, 1, 1); @@ -82,7 +90,7 @@ fn ms_main() { mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; + mesh_output.primitives[0].cull = !helper_reader(); mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); } @@ -109,9 +117,9 @@ fn ms_no_ts() { // See ts_divergent comment @mesh(mesh_output) -@workgroup_size(1) -fn ms_divergent(@builtin(local_invocation_index) thread_id: u32) { - if thread_id == 0 { +@workgroup_size(2) +fn ms_divergent(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { mesh_output.vertex_count = 3; mesh_output.primitive_count = 1; workgroupData = 2.0; diff --git a/naga/tests/naga/snapshots.rs b/naga/tests/naga/snapshots.rs index f203a884749..e8c8800e47e 100644 --- a/naga/tests/naga/snapshots.rs +++ b/naga/tests/naga/snapshots.rs @@ -335,7 +335,8 @@ fn write_output_hlsl( naga::ShaderStage::Vertex => &mut config.vertex, naga::ShaderStage::Fragment => &mut config.fragment, naga::ShaderStage::Compute => &mut config.compute, - naga::ShaderStage::Task | naga::ShaderStage::Mesh => unreachable!(), + naga::ShaderStage::Task => &mut config.task, + naga::ShaderStage::Mesh => &mut config.mesh, } .push(hlsl_snapshots::ConfigItem { entry_point: name.clone(), diff --git a/naga/tests/out/hlsl/spv-barrier.ron b/naga/tests/out/hlsl/spv-barrier.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/spv-barrier.ron +++ b/naga/tests/out/hlsl/spv-barrier.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-do-while.ron b/naga/tests/out/hlsl/spv-do-while.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/spv-do-while.ron +++ b/naga/tests/out/hlsl/spv-do-while.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-empty-global-name.ron b/naga/tests/out/hlsl/spv-empty-global-name.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/spv-empty-global-name.ron +++ b/naga/tests/out/hlsl/spv-empty-global-name.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-fetch_depth.ron b/naga/tests/out/hlsl/spv-fetch_depth.ron index 16eac451859..92f9edbb9cf 100644 --- a/naga/tests/out/hlsl/spv-fetch_depth.ron +++ b/naga/tests/out/hlsl/spv-fetch_depth.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-inv-hyperbolic-trig-functions.ron b/naga/tests/out/hlsl/spv-inv-hyperbolic-trig-functions.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/spv-inv-hyperbolic-trig-functions.ron +++ b/naga/tests/out/hlsl/spv-inv-hyperbolic-trig-functions.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-quad-vert.ron b/naga/tests/out/hlsl/spv-quad-vert.ron index 8240856a5c7..8657dfe11d6 100644 --- a/naga/tests/out/hlsl/spv-quad-vert.ron +++ b/naga/tests/out/hlsl/spv-quad-vert.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-subgroup-operations-s.ron b/naga/tests/out/hlsl/spv-subgroup-operations-s.ron index b973fe3da13..afc4b40f24d 100644 --- a/naga/tests/out/hlsl/spv-subgroup-operations-s.ron +++ b/naga/tests/out/hlsl/spv-subgroup-operations-s.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_0", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/spv-unnamed-gl-per-vertex.ron b/naga/tests/out/hlsl/spv-unnamed-gl-per-vertex.ron index 8240856a5c7..8657dfe11d6 100644 --- a/naga/tests/out/hlsl/spv-unnamed-gl-per-vertex.ron +++ b/naga/tests/out/hlsl/spv-unnamed-gl-per-vertex.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-6438-conflicting-idents.ron b/naga/tests/out/hlsl/wgsl-6438-conflicting-idents.ron index 393d3d67479..bb7c047d643 100644 --- a/naga/tests/out/hlsl/wgsl-6438-conflicting-idents.ron +++ b/naga/tests/out/hlsl/wgsl-6438-conflicting-idents.ron @@ -13,4 +13,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-6772-unpack-expr-accesses.ron b/naga/tests/out/hlsl/wgsl-6772-unpack-expr-accesses.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-6772-unpack-expr-accesses.ron +++ b/naga/tests/out/hlsl/wgsl-6772-unpack-expr-accesses.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-7995-unicode-idents.ron b/naga/tests/out/hlsl/wgsl-7995-unicode-idents.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-7995-unicode-idents.ron +++ b/naga/tests/out/hlsl/wgsl-7995-unicode-idents.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-abstract-types-return.ron b/naga/tests/out/hlsl/wgsl-abstract-types-return.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-abstract-types-return.ron +++ b/naga/tests/out/hlsl/wgsl-abstract-types-return.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-access.ron b/naga/tests/out/hlsl/wgsl-access.ron index d9ef3e5588b..ab50d868212 100644 --- a/naga/tests/out/hlsl/wgsl-access.ron +++ b/naga/tests/out/hlsl/wgsl-access.ron @@ -17,4 +17,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-aliased-ray-query.ron b/naga/tests/out/hlsl/wgsl-aliased-ray-query.ron index 7e97e82d38c..13a055d54fa 100644 --- a/naga/tests/out/hlsl/wgsl-aliased-ray-query.ron +++ b/naga/tests/out/hlsl/wgsl-aliased-ray-query.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_5", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-array-in-ctor.ron b/naga/tests/out/hlsl/wgsl-array-in-ctor.ron index 5c261e59b26..84bfcfec896 100644 --- a/naga/tests/out/hlsl/wgsl-array-in-ctor.ron +++ b/naga/tests/out/hlsl/wgsl-array-in-ctor.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-array-in-function-return-type.ron b/naga/tests/out/hlsl/wgsl-array-in-function-return-type.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/wgsl-array-in-function-return-type.ron +++ b/naga/tests/out/hlsl/wgsl-array-in-function-return-type.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicCompareExchange-int64.ron b/naga/tests/out/hlsl/wgsl-atomicCompareExchange-int64.ron index b7059590b48..430e723d08b 100644 --- a/naga/tests/out/hlsl/wgsl-atomicCompareExchange-int64.ron +++ b/naga/tests/out/hlsl/wgsl-atomicCompareExchange-int64.ron @@ -13,4 +13,8 @@ target_profile:"cs_6_6", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicCompareExchange.ron b/naga/tests/out/hlsl/wgsl-atomicCompareExchange.ron index 70f4fcb3d14..e9785b7bd17 100644 --- a/naga/tests/out/hlsl/wgsl-atomicCompareExchange.ron +++ b/naga/tests/out/hlsl/wgsl-atomicCompareExchange.ron @@ -13,4 +13,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicOps-int64-min-max.ron b/naga/tests/out/hlsl/wgsl-atomicOps-int64-min-max.ron index 67a90355123..e192e09a432 100644 --- a/naga/tests/out/hlsl/wgsl-atomicOps-int64-min-max.ron +++ b/naga/tests/out/hlsl/wgsl-atomicOps-int64-min-max.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_6", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicOps-int64.hlsl b/naga/tests/out/hlsl/wgsl-atomicOps-int64.hlsl index eaebd0f591f..8e6692d972a 100644 --- a/naga/tests/out/hlsl/wgsl-atomicOps-int64.hlsl +++ b/naga/tests/out/hlsl/wgsl-atomicOps-int64.hlsl @@ -30,9 +30,9 @@ groupshared int64_t workgroup_atomic_arr[2]; groupshared Struct workgroup_struct; [numthreads(2, 1, 1)] -void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID) +void cs_main(uint3 id : SV_GroupThreadID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { workgroup_atomic_scalar = (uint64_t)0; workgroup_atomic_arr = (int64_t[2])0; workgroup_struct = (Struct)0; diff --git a/naga/tests/out/hlsl/wgsl-atomicOps-int64.ron b/naga/tests/out/hlsl/wgsl-atomicOps-int64.ron index 67a90355123..e192e09a432 100644 --- a/naga/tests/out/hlsl/wgsl-atomicOps-int64.ron +++ b/naga/tests/out/hlsl/wgsl-atomicOps-int64.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_6", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicOps.hlsl b/naga/tests/out/hlsl/wgsl-atomicOps.hlsl index d32d953a18c..3378a0579de 100644 --- a/naga/tests/out/hlsl/wgsl-atomicOps.hlsl +++ b/naga/tests/out/hlsl/wgsl-atomicOps.hlsl @@ -21,9 +21,9 @@ groupshared int workgroup_atomic_arr[2]; groupshared Struct workgroup_struct; [numthreads(2, 1, 1)] -void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID) +void cs_main(uint3 id : SV_GroupThreadID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { workgroup_atomic_scalar = (uint)0; workgroup_atomic_arr = (int[2])0; workgroup_struct = (Struct)0; diff --git a/naga/tests/out/hlsl/wgsl-atomicOps.ron b/naga/tests/out/hlsl/wgsl-atomicOps.ron index 5c261e59b26..84bfcfec896 100644 --- a/naga/tests/out/hlsl/wgsl-atomicOps.ron +++ b/naga/tests/out/hlsl/wgsl-atomicOps.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicTexture-int64.ron b/naga/tests/out/hlsl/wgsl-atomicTexture-int64.ron index 67a90355123..e192e09a432 100644 --- a/naga/tests/out/hlsl/wgsl-atomicTexture-int64.ron +++ b/naga/tests/out/hlsl/wgsl-atomicTexture-int64.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_6", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-atomicTexture.ron b/naga/tests/out/hlsl/wgsl-atomicTexture.ron index 5c261e59b26..84bfcfec896 100644 --- a/naga/tests/out/hlsl/wgsl-atomicTexture.ron +++ b/naga/tests/out/hlsl/wgsl-atomicTexture.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-barycentrics.ron b/naga/tests/out/hlsl/wgsl-barycentrics.ron index 9ed6bdee58d..5e303e721c8 100644 --- a/naga/tests/out/hlsl/wgsl-barycentrics.ron +++ b/naga/tests/out/hlsl/wgsl-barycentrics.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-binding-arrays.ron b/naga/tests/out/hlsl/wgsl-binding-arrays.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/wgsl-binding-arrays.ron +++ b/naga/tests/out/hlsl/wgsl-binding-arrays.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-bitcast.ron b/naga/tests/out/hlsl/wgsl-bitcast.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-bitcast.ron +++ b/naga/tests/out/hlsl/wgsl-bitcast.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-bits.ron b/naga/tests/out/hlsl/wgsl-bits.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-bits.ron +++ b/naga/tests/out/hlsl/wgsl-bits.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-boids.ron b/naga/tests/out/hlsl/wgsl-boids.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-boids.ron +++ b/naga/tests/out/hlsl/wgsl-boids.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-bounds-check-dynamic-buffer.ron b/naga/tests/out/hlsl/wgsl-bounds-check-dynamic-buffer.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-bounds-check-dynamic-buffer.ron +++ b/naga/tests/out/hlsl/wgsl-bounds-check-dynamic-buffer.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-break-if.ron b/naga/tests/out/hlsl/wgsl-break-if.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-break-if.ron +++ b/naga/tests/out/hlsl/wgsl-break-if.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-collatz.ron b/naga/tests/out/hlsl/wgsl-collatz.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-collatz.ron +++ b/naga/tests/out/hlsl/wgsl-collatz.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-const-exprs.ron b/naga/tests/out/hlsl/wgsl-const-exprs.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-const-exprs.ron +++ b/naga/tests/out/hlsl/wgsl-const-exprs.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-constructors.ron b/naga/tests/out/hlsl/wgsl-constructors.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-constructors.ron +++ b/naga/tests/out/hlsl/wgsl-constructors.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-control-flow.ron b/naga/tests/out/hlsl/wgsl-control-flow.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-control-flow.ron +++ b/naga/tests/out/hlsl/wgsl-control-flow.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-conversion-float-to-int.ron b/naga/tests/out/hlsl/wgsl-conversion-float-to-int.ron index b973fe3da13..afc4b40f24d 100644 --- a/naga/tests/out/hlsl/wgsl-conversion-float-to-int.ron +++ b/naga/tests/out/hlsl/wgsl-conversion-float-to-int.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_0", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-conversions.ron b/naga/tests/out/hlsl/wgsl-conversions.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-conversions.ron +++ b/naga/tests/out/hlsl/wgsl-conversions.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-cross.ron b/naga/tests/out/hlsl/wgsl-cross.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-cross.ron +++ b/naga/tests/out/hlsl/wgsl-cross.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-dualsource.ron b/naga/tests/out/hlsl/wgsl-dualsource.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/wgsl-dualsource.ron +++ b/naga/tests/out/hlsl/wgsl-dualsource.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-empty-if.ron b/naga/tests/out/hlsl/wgsl-empty-if.ron index 03fe98f84cd..f6df37629d6 100644 --- a/naga/tests/out/hlsl/wgsl-empty-if.ron +++ b/naga/tests/out/hlsl/wgsl-empty-if.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-empty.ron b/naga/tests/out/hlsl/wgsl-empty.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-empty.ron +++ b/naga/tests/out/hlsl/wgsl-empty.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-f16.ron b/naga/tests/out/hlsl/wgsl-f16.ron index b396a4626e3..ea04bf30313 100644 --- a/naga/tests/out/hlsl/wgsl-f16.ron +++ b/naga/tests/out/hlsl/wgsl-f16.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_2", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-f64.ron b/naga/tests/out/hlsl/wgsl-f64.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-f64.ron +++ b/naga/tests/out/hlsl/wgsl-f64.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-fragment-output.ron b/naga/tests/out/hlsl/wgsl-fragment-output.ron index 9dfaf7393b0..ab90a7a9891 100644 --- a/naga/tests/out/hlsl/wgsl-fragment-output.ron +++ b/naga/tests/out/hlsl/wgsl-fragment-output.ron @@ -13,4 +13,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.ron b/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.ron index 81f3e9b2950..81fdf5a31b9 100644 --- a/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.ron +++ b/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_4", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron index f1f510d2dca..d47996c692e 100644 --- a/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron +++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_3", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-functions.ron b/naga/tests/out/hlsl/wgsl-functions.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-functions.ron +++ b/naga/tests/out/hlsl/wgsl-functions.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-globals.hlsl b/naga/tests/out/hlsl/wgsl-globals.hlsl index af2951a334a..8d7c3b6f363 100644 --- a/naga/tests/out/hlsl/wgsl-globals.hlsl +++ b/naga/tests/out/hlsl/wgsl-globals.hlsl @@ -110,9 +110,9 @@ uint NagaBufferLength(ByteAddressBuffer buffer) } [numthreads(1, 1, 1)] -void main(uint3 __local_invocation_id : SV_GroupThreadID) +void main(uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { wg = (float[10])0; at_1 = (uint)0; } diff --git a/naga/tests/out/hlsl/wgsl-globals.ron b/naga/tests/out/hlsl/wgsl-globals.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-globals.ron +++ b/naga/tests/out/hlsl/wgsl-globals.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-hlsl-keyword.ron b/naga/tests/out/hlsl/wgsl-hlsl-keyword.ron index eac1b945d2b..a0b1fa2b7ea 100644 --- a/naga/tests/out/hlsl/wgsl-hlsl-keyword.ron +++ b/naga/tests/out/hlsl/wgsl-hlsl-keyword.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-image.ron b/naga/tests/out/hlsl/wgsl-image.ron index f5ca4931d46..aa50ddedb0b 100644 --- a/naga/tests/out/hlsl/wgsl-image.ron +++ b/naga/tests/out/hlsl/wgsl-image.ron @@ -37,4 +37,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-int64.ron b/naga/tests/out/hlsl/wgsl-int64.ron index b973fe3da13..afc4b40f24d 100644 --- a/naga/tests/out/hlsl/wgsl-int64.ron +++ b/naga/tests/out/hlsl/wgsl-int64.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_0", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-interface.hlsl b/naga/tests/out/hlsl/wgsl-interface.hlsl index 6187ca09748..6311b54d18f 100644 --- a/naga/tests/out/hlsl/wgsl-interface.hlsl +++ b/naga/tests/out/hlsl/wgsl-interface.hlsl @@ -75,9 +75,9 @@ FragmentOutput fragment(FragmentInput_fragment fragmentinput_fragment) } [numthreads(1, 1, 1)] -void compute(uint3 global_id : SV_DispatchThreadID, uint3 local_id : SV_GroupThreadID, uint local_index : SV_GroupIndex, uint3 wg_id : SV_GroupID, uint3 num_wgs : SV_GroupID, uint3 __local_invocation_id : SV_GroupThreadID) +void compute(uint3 global_id : SV_DispatchThreadID, uint3 local_id : SV_GroupThreadID, uint local_index : SV_GroupIndex, uint3 wg_id : SV_GroupID, uint3 num_wgs : SV_GroupID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { output = (uint[1])0; } GroupMemoryBarrierWithGroupSync(); diff --git a/naga/tests/out/hlsl/wgsl-interface.ron b/naga/tests/out/hlsl/wgsl-interface.ron index 948962b9917..fa8774ba24c 100644 --- a/naga/tests/out/hlsl/wgsl-interface.ron +++ b/naga/tests/out/hlsl/wgsl-interface.ron @@ -21,4 +21,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-interpolate.ron b/naga/tests/out/hlsl/wgsl-interpolate.ron index d0046b04dd3..8c6fc532a04 100644 --- a/naga/tests/out/hlsl/wgsl-interpolate.ron +++ b/naga/tests/out/hlsl/wgsl-interpolate.ron @@ -13,4 +13,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-interpolate_compat.ron b/naga/tests/out/hlsl/wgsl-interpolate_compat.ron index d0046b04dd3..8c6fc532a04 100644 --- a/naga/tests/out/hlsl/wgsl-interpolate_compat.ron +++ b/naga/tests/out/hlsl/wgsl-interpolate_compat.ron @@ -13,4 +13,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-mat_cx2.ron b/naga/tests/out/hlsl/wgsl-mat_cx2.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-mat_cx2.ron +++ b/naga/tests/out/hlsl/wgsl-mat_cx2.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-mat_cx3.ron b/naga/tests/out/hlsl/wgsl-mat_cx3.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-mat_cx3.ron +++ b/naga/tests/out/hlsl/wgsl-mat_cx3.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-math-functions.ron b/naga/tests/out/hlsl/wgsl-math-functions.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/wgsl-math-functions.ron +++ b/naga/tests/out/hlsl/wgsl-math-functions.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-mesh-shader-empty.hlsl b/naga/tests/out/hlsl/wgsl-mesh-shader-empty.hlsl new file mode 100644 index 00000000000..16608ab0b47 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-mesh-shader-empty.hlsl @@ -0,0 +1,65 @@ +struct TaskPayload { + uint dummy; +}; + +struct VertexOutput { + float4 position : SV_Position; +}; + +struct PrimitiveOutput { + uint3 indices_; +}; + +struct MeshOutput { + VertexOutput vertices_[3]; + PrimitiveOutput primitives_[1]; + uint vertex_count; + uint primitive_count; +}; + +groupshared TaskPayload taskPayload; +groupshared MeshOutput mesh_output; + +struct MeshVertexOutput_ms_main { + float4 position : SV_Position; +}; + +struct MeshPrimitiveOutput_ms_main { +}; + +uint3 _ts_main() +{ + return uint3(1u, 1u, 1u); +} +[numthreads(64, 1, 1)] +void ts_main(uint __local_invocation_index : SV_GroupIndex) { + if (all(__local_invocation_index == 0)) { + taskPayload = (TaskPayload)0; + } + GroupMemoryBarrierWithGroupSync(); + uint3 gridSize = _ts_main(); + GroupMemoryBarrierWithGroupSync(); + DispatchMesh(gridSize.x, gridSize.y, gridSize.z, taskPayload); +} + +void _ms_main(in TaskPayload taskPayload) +{ + return; +} +[numthreads(64, 1, 1)] +[outputtopology("triangle")] +void ms_main(uint __local_invocation_index : SV_GroupIndex, out indices uint3 triangleIndices[1], out vertices MeshVertexOutput_ms_main vertices_[3], out primitives MeshPrimitiveOutput_ms_main primitives_[1], in payload TaskPayload taskPayload) { + if (all(__local_invocation_index == 0)) { + mesh_output = (MeshOutput)0; + } + GroupMemoryBarrierWithGroupSync(); + _ms_main(taskPayload); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(mesh_output.vertex_count, mesh_output.primitive_count); + for (int vertIndex = __local_invocation_index; vertIndex < mesh_output.vertex_count; vertIndex += 64) { + vertices_[vertIndex].position = mesh_output.vertices_[vertIndex].position; + } + for (int primIndex = __local_invocation_index; primIndex < mesh_output.primitive_count; primIndex += 64) { + triangleIndices[primIndex] = mesh_output.primitives_[primIndex].indices_; + } +} diff --git a/naga/tests/out/hlsl/wgsl-mesh-shader-empty.ron b/naga/tests/out/hlsl/wgsl-mesh-shader-empty.ron new file mode 100644 index 00000000000..8156a7a68d2 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-mesh-shader-empty.ron @@ -0,0 +1,20 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ], + task:[ + ( + entry_point:"ts_main", + target_profile:"as_6_5", + ), + ], + mesh:[ + ( + entry_point:"ms_main", + target_profile:"ms_6_5", + ), + ], +) diff --git a/naga/tests/out/hlsl/wgsl-mesh-shader-lines.hlsl b/naga/tests/out/hlsl/wgsl-mesh-shader-lines.hlsl new file mode 100644 index 00000000000..4a3140a04b0 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-mesh-shader-lines.hlsl @@ -0,0 +1,65 @@ +struct TaskPayload { + uint dummy; +}; + +struct VertexOutput { + float4 position : SV_Position; +}; + +struct PrimitiveOutput { + uint2 indices_; +}; + +struct MeshOutput { + VertexOutput vertices_[2]; + PrimitiveOutput primitives_[1]; + uint vertex_count; + uint primitive_count; +}; + +groupshared TaskPayload taskPayload; +groupshared MeshOutput mesh_output; + +struct MeshVertexOutput_ms_main { + float4 position : SV_Position; +}; + +struct MeshPrimitiveOutput_ms_main { +}; + +uint3 _ts_main() +{ + return uint3(1u, 1u, 1u); +} +[numthreads(64, 1, 1)] +void ts_main(uint __local_invocation_index : SV_GroupIndex) { + if (all(__local_invocation_index == 0)) { + taskPayload = (TaskPayload)0; + } + GroupMemoryBarrierWithGroupSync(); + uint3 gridSize = _ts_main(); + GroupMemoryBarrierWithGroupSync(); + DispatchMesh(gridSize.x, gridSize.y, gridSize.z, taskPayload); +} + +void _ms_main(in TaskPayload taskPayload) +{ + return; +} +[numthreads(64, 1, 1)] +[outputtopology("line")] +void ms_main(uint __local_invocation_index : SV_GroupIndex, out indices uint2 lineIndices[1], out vertices MeshVertexOutput_ms_main vertices_[2], out primitives MeshPrimitiveOutput_ms_main primitives_[1], in payload TaskPayload taskPayload) { + if (all(__local_invocation_index == 0)) { + mesh_output = (MeshOutput)0; + } + GroupMemoryBarrierWithGroupSync(); + _ms_main(taskPayload); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(mesh_output.vertex_count, mesh_output.primitive_count); + for (int vertIndex = __local_invocation_index; vertIndex < mesh_output.vertex_count; vertIndex += 64) { + vertices_[vertIndex].position = mesh_output.vertices_[vertIndex].position; + } + for (int primIndex = __local_invocation_index; primIndex < mesh_output.primitive_count; primIndex += 64) { + lineIndices[primIndex] = mesh_output.primitives_[primIndex].indices_; + } +} diff --git a/naga/tests/out/hlsl/wgsl-mesh-shader-lines.ron b/naga/tests/out/hlsl/wgsl-mesh-shader-lines.ron new file mode 100644 index 00000000000..8156a7a68d2 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-mesh-shader-lines.ron @@ -0,0 +1,20 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ], + task:[ + ( + entry_point:"ts_main", + target_profile:"as_6_5", + ), + ], + mesh:[ + ( + entry_point:"ms_main", + target_profile:"ms_6_5", + ), + ], +) diff --git a/naga/tests/out/hlsl/wgsl-mesh-shader.hlsl b/naga/tests/out/hlsl/wgsl-mesh-shader.hlsl new file mode 100644 index 00000000000..da652bd4ff2 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-mesh-shader.hlsl @@ -0,0 +1,269 @@ +struct TaskPayload { + float4 colorMask; + bool visible; + int _end_pad_0; + int _end_pad_1; + int _end_pad_2; +}; + +struct VertexOutput { + float4 position : SV_Position; + float4 color : LOC0; +}; + +struct PrimitiveOutput { + uint3 indices_; + bool cull : SV_CullPrimitive; + float4 colorMask : LOC1 : primitive; +}; + +struct PrimitiveInput { + float4 colorMask : LOC1 : primitive; +}; + +struct MeshOutput { + VertexOutput vertices_[3]; + PrimitiveOutput primitives_[1]; + uint vertex_count; + uint primitive_count; +}; + +groupshared TaskPayload taskPayload; +groupshared float workgroupData; +groupshared MeshOutput mesh_output; + +struct MeshVertexOutput_ms_main { + float4 color : LOC0; + float4 position : SV_Position; +}; + +struct MeshPrimitiveOutput_ms_main { + float4 colorMask : LOC1 : primitive; + bool cull : SV_CullPrimitive; +}; + +struct MeshVertexOutput_ms_no_ts { + float4 color_1 : LOC0; + float4 position_1 : SV_Position; +}; + +struct MeshPrimitiveOutput_ms_no_ts { + float4 colorMask_1 : LOC1 : primitive; + bool cull_1 : SV_CullPrimitive; +}; + +struct MeshVertexOutput_ms_divergent { + float4 color_2 : LOC0; + float4 position_2 : SV_Position; +}; + +struct MeshPrimitiveOutput_ms_divergent { + float4 colorMask_2 : LOC1 : primitive; + bool cull_2 : SV_CullPrimitive; +}; + +struct FragmentInput_fs_main { + float4 color_3 : LOC0; + float4 colorMask_3 : LOC1 : primitive; + float4 position_3 : SV_Position; +}; + +bool helper_reader(in TaskPayload taskPayload) +{ + bool _e2 = taskPayload.visible; + return _e2; +} + +void helper_writer(bool value) +{ + taskPayload.visible = value; + return; +} + +uint3 _ts_main() +{ + workgroupData = 1.0; + taskPayload.colorMask = float4(1.0, 1.0, 0.0, 1.0); + helper_writer(true); + const bool _e12 = helper_reader(taskPayload); + taskPayload.visible = _e12; + return uint3(1u, 1u, 1u); +} +[numthreads(1, 1, 1)] +void ts_main(uint __local_invocation_index : SV_GroupIndex) { + if (all(__local_invocation_index == 0)) { + taskPayload = (TaskPayload)0; + workgroupData = (float)0; + } + GroupMemoryBarrierWithGroupSync(); + uint3 gridSize = _ts_main(); + GroupMemoryBarrierWithGroupSync(); + if ( + gridSize.x > 256 || + gridSize.y > 256 || + gridSize.z > 256 || + ((uint64_t)gridSize.x) * ((uint64_t)gridSize.y) > 0xffffffffull || + ((uint64_t)gridSize.x) * ((uint64_t)gridSize.y) * ((uint64_t)gridSize.z) > 1024 + ) { + gridSize = uint3(0, 0, 0); + } + DispatchMesh(gridSize.x, gridSize.y, gridSize.z, taskPayload); +} + +uint3 _ts_divergent(uint3 thread_id : SV_GroupThreadID) +{ + if ((thread_id.x == 0u)) { + taskPayload.colorMask = float4(1.0, 1.0, 0.0, 1.0); + taskPayload.visible = true; + return uint3(1u, 1u, 1u); + } + return uint3(2u, 2u, 2u); +} +[numthreads(2, 1, 1)] +void ts_divergent(uint3 thread_id : SV_GroupThreadID, uint __local_invocation_index : SV_GroupIndex) { + if (all(__local_invocation_index == 0)) { + taskPayload = (TaskPayload)0; + } + GroupMemoryBarrierWithGroupSync(); + uint3 gridSize_1 = _ts_divergent(thread_id); + GroupMemoryBarrierWithGroupSync(); + if ( + gridSize_1.x > 256 || + gridSize_1.y > 256 || + gridSize_1.z > 256 || + ((uint64_t)gridSize_1.x) * ((uint64_t)gridSize_1.y) > 0xffffffffull || + ((uint64_t)gridSize_1.x) * ((uint64_t)gridSize_1.y) * ((uint64_t)gridSize_1.z) > 1024 + ) { + gridSize_1 = uint3(0, 0, 0); + } + DispatchMesh(gridSize_1.x, gridSize_1.y, gridSize_1.z, taskPayload); +} + +void _ms_main(in TaskPayload taskPayload) +{ + mesh_output.vertex_count = 3u; + mesh_output.primitive_count = 1u; + workgroupData = 2.0; + mesh_output.vertices_[0].position = float4(0.0, 1.0, 0.0, 1.0); + float4 _e23 = taskPayload.colorMask; + mesh_output.vertices_[0].color = (float4(0.0, 1.0, 0.0, 1.0) * _e23); + mesh_output.vertices_[1].position = float4(-1.0, -1.0, 0.0, 1.0); + float4 _e45 = taskPayload.colorMask; + mesh_output.vertices_[1].color = (float4(0.0, 0.0, 1.0, 1.0) * _e45); + mesh_output.vertices_[2].position = float4(1.0, -1.0, 0.0, 1.0); + float4 _e67 = taskPayload.colorMask; + mesh_output.vertices_[2].color = (float4(1.0, 0.0, 0.0, 1.0) * _e67); + mesh_output.primitives_[0].indices_ = uint3(0u, 1u, 2u); + const bool _e86 = helper_reader(taskPayload); + mesh_output.primitives_[0].cull = !(_e86); + mesh_output.primitives_[0].colorMask = float4(1.0, 0.0, 1.0, 1.0); + return; +} +[numthreads(1, 1, 1)] +[outputtopology("triangle")] +void ms_main(uint __local_invocation_index : SV_GroupIndex, out indices uint3 triangleIndices[1], out vertices MeshVertexOutput_ms_main vertices_[3], out primitives MeshPrimitiveOutput_ms_main primitives_[1], in payload TaskPayload taskPayload) { + if (all(__local_invocation_index == 0)) { + workgroupData = (float)0; + mesh_output = (MeshOutput)0; + } + GroupMemoryBarrierWithGroupSync(); + _ms_main(taskPayload); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(mesh_output.vertex_count, mesh_output.primitive_count); + for (int vertIndex = __local_invocation_index; vertIndex < mesh_output.vertex_count; vertIndex += 1) { + vertices_[vertIndex].color = mesh_output.vertices_[vertIndex].color; + vertices_[vertIndex].position = mesh_output.vertices_[vertIndex].position; + } + for (int primIndex = __local_invocation_index; primIndex < mesh_output.primitive_count; primIndex += 1) { + primitives_[primIndex].colorMask = mesh_output.primitives_[primIndex].colorMask; + primitives_[primIndex].cull = mesh_output.primitives_[primIndex].cull; + triangleIndices[primIndex] = mesh_output.primitives_[primIndex].indices_; + } +} + +void _ms_no_ts() +{ + mesh_output.vertex_count = 3u; + mesh_output.primitive_count = 1u; + workgroupData = 2.0; + mesh_output.vertices_[0].position = float4(0.0, 1.0, 0.0, 1.0); + mesh_output.vertices_[0].color = float4(0.0, 1.0, 0.0, 1.0); + mesh_output.vertices_[1].position = float4(-1.0, -1.0, 0.0, 1.0); + mesh_output.vertices_[1].color = float4(0.0, 0.0, 1.0, 1.0); + mesh_output.vertices_[2].position = float4(1.0, -1.0, 0.0, 1.0); + mesh_output.vertices_[2].color = float4(1.0, 0.0, 0.0, 1.0); + mesh_output.primitives_[0].indices_ = uint3(0u, 1u, 2u); + mesh_output.primitives_[0].cull = false; + mesh_output.primitives_[0].colorMask = float4(1.0, 0.0, 1.0, 1.0); + return; +} +[numthreads(1, 1, 1)] +[outputtopology("triangle")] +void ms_no_ts(uint __local_invocation_index : SV_GroupIndex, out indices uint3 triangleIndices_1[1], out vertices MeshVertexOutput_ms_no_ts vertices_1[3], out primitives MeshPrimitiveOutput_ms_no_ts primitives_1[1]) { + if (all(__local_invocation_index == 0)) { + workgroupData = (float)0; + mesh_output = (MeshOutput)0; + } + GroupMemoryBarrierWithGroupSync(); + _ms_no_ts(); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(mesh_output.vertex_count, mesh_output.primitive_count); + for (int vertIndex_1 = __local_invocation_index; vertIndex_1 < mesh_output.vertex_count; vertIndex_1 += 1) { + vertices_1[vertIndex_1].color_1 = mesh_output.vertices_[vertIndex_1].color; + vertices_1[vertIndex_1].position_1 = mesh_output.vertices_[vertIndex_1].position; + } + for (int primIndex_1 = __local_invocation_index; primIndex_1 < mesh_output.primitive_count; primIndex_1 += 1) { + primitives_1[primIndex_1].colorMask_1 = mesh_output.primitives_[primIndex_1].colorMask; + primitives_1[primIndex_1].cull_1 = mesh_output.primitives_[primIndex_1].cull; + triangleIndices_1[primIndex_1] = mesh_output.primitives_[primIndex_1].indices_; + } +} + +void _ms_divergent(uint3 thread_id_1 : SV_GroupThreadID) +{ + if ((thread_id_1.x == 0u)) { + mesh_output.vertex_count = 3u; + mesh_output.primitive_count = 1u; + workgroupData = 2.0; + mesh_output.vertices_[0].position = float4(0.0, 1.0, 0.0, 1.0); + mesh_output.vertices_[0].color = float4(0.0, 1.0, 0.0, 1.0); + mesh_output.vertices_[1].position = float4(-1.0, -1.0, 0.0, 1.0); + mesh_output.vertices_[1].color = float4(0.0, 0.0, 1.0, 1.0); + mesh_output.vertices_[2].position = float4(1.0, -1.0, 0.0, 1.0); + mesh_output.vertices_[2].color = float4(1.0, 0.0, 0.0, 1.0); + mesh_output.primitives_[0].indices_ = uint3(0u, 1u, 2u); + mesh_output.primitives_[0].cull = false; + mesh_output.primitives_[0].colorMask = float4(1.0, 0.0, 1.0, 1.0); + return; + } else { + return; + } +} +[numthreads(2, 1, 1)] +[outputtopology("triangle")] +void ms_divergent(uint3 thread_id_1 : SV_GroupThreadID, uint __local_invocation_index : SV_GroupIndex, out indices uint3 triangleIndices_2[1], out vertices MeshVertexOutput_ms_divergent vertices_2[3], out primitives MeshPrimitiveOutput_ms_divergent primitives_2[1]) { + if (all(__local_invocation_index == 0)) { + workgroupData = (float)0; + mesh_output = (MeshOutput)0; + } + GroupMemoryBarrierWithGroupSync(); + _ms_divergent(thread_id_1); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(mesh_output.vertex_count, mesh_output.primitive_count); + for (int vertIndex_2 = __local_invocation_index; vertIndex_2 < mesh_output.vertex_count; vertIndex_2 += 2) { + vertices_2[vertIndex_2].color_2 = mesh_output.vertices_[vertIndex_2].color; + vertices_2[vertIndex_2].position_2 = mesh_output.vertices_[vertIndex_2].position; + } + for (int primIndex_2 = __local_invocation_index; primIndex_2 < mesh_output.primitive_count; primIndex_2 += 2) { + primitives_2[primIndex_2].colorMask_2 = mesh_output.primitives_[primIndex_2].colorMask; + primitives_2[primIndex_2].cull_2 = mesh_output.primitives_[primIndex_2].cull; + triangleIndices_2[primIndex_2] = mesh_output.primitives_[primIndex_2].indices_; + } +} + +float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 +{ + VertexOutput vertex = { fragmentinput_fs_main.position_3, fragmentinput_fs_main.color_3 }; + PrimitiveInput primitive = { fragmentinput_fs_main.colorMask_3 }; + return (vertex.color * primitive.colorMask); +} diff --git a/naga/tests/out/hlsl/wgsl-mesh-shader.ron b/naga/tests/out/hlsl/wgsl-mesh-shader.ron new file mode 100644 index 00000000000..0f1c159ff3b --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-mesh-shader.ron @@ -0,0 +1,36 @@ +( + vertex:[ + ], + fragment:[ + ( + entry_point:"fs_main", + target_profile:"ps_6_5", + ), + ], + compute:[ + ], + task:[ + ( + entry_point:"ts_main", + target_profile:"as_6_5", + ), + ( + entry_point:"ts_divergent", + target_profile:"as_6_5", + ), + ], + mesh:[ + ( + entry_point:"ms_main", + target_profile:"ms_6_5", + ), + ( + entry_point:"ms_no_ts", + target_profile:"ms_6_5", + ), + ( + entry_point:"ms_divergent", + target_profile:"ms_6_5", + ), + ], +) diff --git a/naga/tests/out/hlsl/wgsl-multiview.ron b/naga/tests/out/hlsl/wgsl-multiview.ron index ff856788943..ea2a04cd8ff 100644 --- a/naga/tests/out/hlsl/wgsl-multiview.ron +++ b/naga/tests/out/hlsl/wgsl-multiview.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-operators.ron b/naga/tests/out/hlsl/wgsl-operators.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-operators.ron +++ b/naga/tests/out/hlsl/wgsl-operators.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-overrides.ron b/naga/tests/out/hlsl/wgsl-overrides.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-overrides.ron +++ b/naga/tests/out/hlsl/wgsl-overrides.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-padding.ron b/naga/tests/out/hlsl/wgsl-padding.ron index 46dfdd83e30..7a49d875af0 100644 --- a/naga/tests/out/hlsl/wgsl-padding.ron +++ b/naga/tests/out/hlsl/wgsl-padding.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-phony_assignment.ron b/naga/tests/out/hlsl/wgsl-phony_assignment.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-phony_assignment.ron +++ b/naga/tests/out/hlsl/wgsl-phony_assignment.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-pointer-function-arg.ron b/naga/tests/out/hlsl/wgsl-pointer-function-arg.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-pointer-function-arg.ron +++ b/naga/tests/out/hlsl/wgsl-pointer-function-arg.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-push-constants.ron b/naga/tests/out/hlsl/wgsl-push-constants.ron index e444486559d..73c10d5cd5e 100644 --- a/naga/tests/out/hlsl/wgsl-push-constants.ron +++ b/naga/tests/out/hlsl/wgsl-push-constants.ron @@ -13,4 +13,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-quad.ron b/naga/tests/out/hlsl/wgsl-quad.ron index de905523563..841c148dcbf 100644 --- a/naga/tests/out/hlsl/wgsl-quad.ron +++ b/naga/tests/out/hlsl/wgsl-quad.ron @@ -17,4 +17,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-ray-query-no-init-tracking.ron b/naga/tests/out/hlsl/wgsl-ray-query-no-init-tracking.ron index a31e1db125a..63c446e0605 100644 --- a/naga/tests/out/hlsl/wgsl-ray-query-no-init-tracking.ron +++ b/naga/tests/out/hlsl/wgsl-ray-query-no-init-tracking.ron @@ -13,4 +13,8 @@ target_profile:"cs_6_5", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-ray-query.ron b/naga/tests/out/hlsl/wgsl-ray-query.ron index a31e1db125a..63c446e0605 100644 --- a/naga/tests/out/hlsl/wgsl-ray-query.ron +++ b/naga/tests/out/hlsl/wgsl-ray-query.ron @@ -13,4 +13,8 @@ target_profile:"cs_6_5", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-select.ron b/naga/tests/out/hlsl/wgsl-select.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-select.ron +++ b/naga/tests/out/hlsl/wgsl-select.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-shadow.ron b/naga/tests/out/hlsl/wgsl-shadow.ron index 69be5b25e01..46e7fdbc4fe 100644 --- a/naga/tests/out/hlsl/wgsl-shadow.ron +++ b/naga/tests/out/hlsl/wgsl-shadow.ron @@ -17,4 +17,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-skybox.ron b/naga/tests/out/hlsl/wgsl-skybox.ron index 27b0c4af4da..11057f64012 100644 --- a/naga/tests/out/hlsl/wgsl-skybox.ron +++ b/naga/tests/out/hlsl/wgsl-skybox.ron @@ -13,4 +13,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-standard.ron b/naga/tests/out/hlsl/wgsl-standard.ron index 82373299d87..0ac408099a1 100644 --- a/naga/tests/out/hlsl/wgsl-standard.ron +++ b/naga/tests/out/hlsl/wgsl-standard.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-storage-textures.ron b/naga/tests/out/hlsl/wgsl-storage-textures.ron index b07b4fb7aad..b6ea267f85c 100644 --- a/naga/tests/out/hlsl/wgsl-storage-textures.ron +++ b/naga/tests/out/hlsl/wgsl-storage-textures.ron @@ -13,4 +13,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-struct-layout.ron b/naga/tests/out/hlsl/wgsl-struct-layout.ron index 04fe25e38a1..67ae82f4928 100644 --- a/naga/tests/out/hlsl/wgsl-struct-layout.ron +++ b/naga/tests/out/hlsl/wgsl-struct-layout.ron @@ -29,4 +29,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-subgroup-operations.ron b/naga/tests/out/hlsl/wgsl-subgroup-operations.ron index b973fe3da13..afc4b40f24d 100644 --- a/naga/tests/out/hlsl/wgsl-subgroup-operations.ron +++ b/naga/tests/out/hlsl/wgsl-subgroup-operations.ron @@ -9,4 +9,8 @@ target_profile:"cs_6_0", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-texture-arg.ron b/naga/tests/out/hlsl/wgsl-texture-arg.ron index 341a4c528e3..403f6c3d24f 100644 --- a/naga/tests/out/hlsl/wgsl-texture-arg.ron +++ b/naga/tests/out/hlsl/wgsl-texture-arg.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-texture-external.ron b/naga/tests/out/hlsl/wgsl-texture-external.ron index 23afa21e1f5..261c82f7d28 100644 --- a/naga/tests/out/hlsl/wgsl-texture-external.ron +++ b/naga/tests/out/hlsl/wgsl-texture-external.ron @@ -17,4 +17,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-type-inference.ron b/naga/tests/out/hlsl/wgsl-type-inference.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-type-inference.ron +++ b/naga/tests/out/hlsl/wgsl-type-inference.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_frag.ron b/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_frag.ron index eac1b945d2b..a0b1fa2b7ea 100644 --- a/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_frag.ron +++ b/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_frag.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_vert.ron b/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_vert.ron index a24f8d0eb8b..5a5236b3fda 100644 --- a/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_vert.ron +++ b/naga/tests/out/hlsl/wgsl-unconsumed_vertex_outputs_vert.ron @@ -9,4 +9,8 @@ ], compute:[ ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.hlsl b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.hlsl index cbbdb7feb50..4237c6abc43 100644 --- a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.hlsl +++ b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.hlsl @@ -8,9 +8,9 @@ groupshared int wg_signed; groupshared AtomicStruct wg_struct; [numthreads(64, 1, 1)] -void test_atomic_workgroup_uniform_load(uint3 workgroup_id : SV_GroupID, uint3 local_id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID) +void test_atomic_workgroup_uniform_load(uint3 workgroup_id : SV_GroupID, uint3 local_id : SV_GroupThreadID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { wg_scalar = (uint)0; wg_signed = (int)0; wg_struct = (AtomicStruct)0; diff --git a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.ron b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.ron index 61f1e01fa64..6557be880e9 100644 --- a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.ron +++ b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.hlsl b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.hlsl index 0bfb55cb6cd..1a3d7228a0c 100644 --- a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.hlsl +++ b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.hlsl @@ -3,9 +3,9 @@ static const uint SIZE = 128u; groupshared int arr_i32_[128]; [numthreads(4, 1, 1)] -void test_workgroupUniformLoad(uint3 workgroup_id : SV_GroupID, uint3 __local_invocation_id : SV_GroupThreadID) +void test_workgroupUniformLoad(uint3 workgroup_id : SV_GroupID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { arr_i32_ = (int[128])0; } GroupMemoryBarrierWithGroupSync(); diff --git a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.ron b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.ron index 17e926cdebd..e328c18589a 100644 --- a/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.ron +++ b/naga/tests/out/hlsl/wgsl-workgroup-uniform-load.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/hlsl/wgsl-workgroup-var-init.hlsl b/naga/tests/out/hlsl/wgsl-workgroup-var-init.hlsl index 49b4fe621a5..8127f58449a 100644 --- a/naga/tests/out/hlsl/wgsl-workgroup-var-init.hlsl +++ b/naga/tests/out/hlsl/wgsl-workgroup-var-init.hlsl @@ -8,9 +8,9 @@ groupshared WStruct w_mem; RWByteAddressBuffer output : register(u0); [numthreads(1, 1, 1)] -void main(uint3 __local_invocation_id : SV_GroupThreadID) +void main(uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + if (all(__local_invocation_index == 0)) { w_mem = (WStruct)0; } GroupMemoryBarrierWithGroupSync(); diff --git a/naga/tests/out/hlsl/wgsl-workgroup-var-init.ron b/naga/tests/out/hlsl/wgsl-workgroup-var-init.ron index a07b03300b1..f1edcbbbdd1 100644 --- a/naga/tests/out/hlsl/wgsl-workgroup-var-init.ron +++ b/naga/tests/out/hlsl/wgsl-workgroup-var-init.ron @@ -9,4 +9,8 @@ target_profile:"cs_5_1", ), ], + task:[ + ], + mesh:[ + ], ) diff --git a/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm b/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm index 4ce80049d24..c64e2c1b762 100644 --- a/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm +++ b/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm @@ -1,14 +1,14 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 131 +; Bound: 127 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %112 "main" %116 +OpEntryPoint GLCompute %112 "main" %115 OpExecutionMode %112 LocalSize 1 1 1 OpDecorate %6 ArrayStride 4 -OpDecorate %116 BuiltIn LocalInvocationId +OpDecorate %115 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeFloat 32 %4 = OpTypeInt 32 1 @@ -56,13 +56,10 @@ OpDecorate %116 BuiltIn LocalInvocationId %102 = OpConstant %3 7 %108 = OpTypePointer Workgroup %5 %114 = OpConstantNull %6 -%115 = OpTypeVector %5 3 -%117 = OpTypePointer Input %115 -%116 = OpVariable %117 Input -%119 = OpConstantNull %115 -%121 = OpTypeBool -%120 = OpTypeVector %121 3 -%126 = OpConstant %5 264 +%116 = OpTypePointer Input %5 +%115 = OpVariable %116 Input +%119 = OpTypeBool +%122 = OpConstant %5 264 %20 = OpFunction %2 None %21 %19 = OpLabel %63 = OpVariable %36 Function %64 @@ -152,20 +149,19 @@ OpFunctionEnd %111 = OpLabel OpBranch %113 %113 = OpLabel -%118 = OpLoad %115 %116 -%122 = OpIEqual %120 %118 %119 -%123 = OpAll %121 %122 -OpSelectionMerge %124 None -OpBranchConditional %123 %125 %124 -%125 = OpLabel +%117 = OpLoad %5 %115 +%118 = OpIEqual %119 %117 %11 +OpSelectionMerge %120 None +OpBranchConditional %118 %121 %120 +%121 = OpLabel OpStore %17 %114 -OpBranch %124 -%124 = OpLabel -OpControlBarrier %30 %30 %126 -OpBranch %127 -%127 = OpLabel -%128 = OpFunctionCall %2 %20 -%129 = OpFunctionCall %2 %100 -%130 = OpFunctionCall %2 %105 +OpBranch %120 +%120 = OpLabel +OpControlBarrier %30 %30 %122 +OpBranch %123 +%123 = OpLabel +%124 = OpFunctionCall %2 %20 +%125 = OpFunctionCall %2 %100 +%126 = OpFunctionCall %2 %105 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm b/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm index d3c2b26be9a..c544556caa8 100644 --- a/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm +++ b/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm @@ -8,7 +8,7 @@ OpCapability Int64 OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %32 "cs_main" %29 +OpEntryPoint GLCompute %32 "cs_main" %29 %49 OpExecutionMode %32 LocalSize 2 1 1 OpDecorate %5 ArrayStride 8 OpMemberDecorate %8 0 Offset 0 @@ -30,6 +30,7 @@ OpDecorate %19 Binding 2 OpDecorate %20 Block OpMemberDecorate %20 0 Offset 0 OpDecorate %29 BuiltIn LocalInvocationId +OpDecorate %49 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 64 0 %4 = OpTypeInt 64 1 @@ -70,8 +71,8 @@ OpDecorate %29 BuiltIn LocalInvocationId %46 = OpConstantNull %3 %47 = OpConstantNull %5 %48 = OpConstantNull %8 -%49 = OpConstantNull %9 -%50 = OpTypeVector %10 3 +%50 = OpTypePointer Input %7 +%49 = OpVariable %50 Input %55 = OpConstant %7 264 %58 = OpTypeInt 32 1 %57 = OpConstant %58 1 @@ -87,8 +88,8 @@ OpDecorate %29 BuiltIn LocalInvocationId %40 = OpAccessChain %39 %19 %35 OpBranch %45 %45 = OpLabel -%51 = OpIEqual %50 %31 %49 -%52 = OpAll %10 %51 +%51 = OpLoad %7 %49 +%52 = OpIEqual %10 %51 %35 OpSelectionMerge %53 None OpBranchConditional %52 %54 %53 %54 = OpLabel diff --git a/naga/tests/out/spv/wgsl-atomicOps.spvasm b/naga/tests/out/spv/wgsl-atomicOps.spvasm index 839200d7694..70f07928d56 100644 --- a/naga/tests/out/spv/wgsl-atomicOps.spvasm +++ b/naga/tests/out/spv/wgsl-atomicOps.spvasm @@ -6,7 +6,7 @@ OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %31 "cs_main" %28 +OpEntryPoint GLCompute %31 "cs_main" %28 %47 OpExecutionMode %31 LocalSize 2 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %7 0 Offset 0 @@ -28,6 +28,7 @@ OpDecorate %18 Binding 2 OpDecorate %19 Block OpMemberDecorate %19 0 Offset 0 OpDecorate %28 BuiltIn LocalInvocationId +OpDecorate %47 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -66,8 +67,8 @@ OpDecorate %28 BuiltIn LocalInvocationId %44 = OpConstantNull %3 %45 = OpConstantNull %5 %46 = OpConstantNull %7 -%47 = OpConstantNull %8 -%48 = OpTypeVector %9 3 +%48 = OpTypePointer Input %3 +%47 = OpVariable %48 Input %53 = OpConstant %3 264 %55 = OpTypePointer StorageBuffer %4 %59 = OpTypePointer Workgroup %4 @@ -79,8 +80,8 @@ OpDecorate %28 BuiltIn LocalInvocationId %39 = OpAccessChain %38 %18 %34 OpBranch %43 %43 = OpLabel -%49 = OpIEqual %48 %30 %47 -%50 = OpAll %9 %49 +%49 = OpLoad %3 %47 +%50 = OpIEqual %9 %49 %34 OpSelectionMerge %51 None OpBranchConditional %50 %52 %51 %52 = OpLabel diff --git a/naga/tests/out/spv/wgsl-globals.spvasm b/naga/tests/out/spv/wgsl-globals.spvasm index 2b68ed5a17e..7253b9aab09 100644 --- a/naga/tests/out/spv/wgsl-globals.spvasm +++ b/naga/tests/out/spv/wgsl-globals.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 197 +; Bound: 193 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %114 "main" %139 +OpEntryPoint GLCompute %114 "main" %138 OpExecutionMode %114 LocalSize 1 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %9 0 Offset 0 @@ -57,7 +57,7 @@ OpDecorate %52 DescriptorSet 0 OpDecorate %52 Binding 7 OpDecorate %53 Block OpMemberDecorate %53 0 Offset 0 -OpDecorate %139 BuiltIn LocalInvocationId +OpDecorate %138 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeBool %4 = OpTypeFloat 32 @@ -139,28 +139,25 @@ OpDecorate %139 BuiltIn LocalInvocationId %134 = OpTypePointer Function %3 %136 = OpConstantNull %5 %137 = OpConstantNull %7 -%138 = OpTypeVector %7 3 -%140 = OpTypePointer Input %138 -%139 = OpVariable %140 Input -%142 = OpConstantNull %138 -%143 = OpTypeVector %3 3 -%148 = OpConstant %7 264 -%151 = OpTypePointer Workgroup %4 -%152 = OpTypePointer Uniform %21 -%153 = OpTypePointer Uniform %20 -%154 = OpTypePointer Uniform %26 -%158 = OpTypePointer Uniform %17 -%159 = OpTypePointer Uniform %16 -%160 = OpTypePointer Uniform %12 -%165 = OpConstant %7 7 -%172 = OpConstant %7 6 -%174 = OpTypePointer StorageBuffer %10 -%175 = OpConstant %7 1 -%178 = OpConstant %7 5 -%180 = OpTypePointer Uniform %4 -%181 = OpConstant %7 3 -%184 = OpConstant %7 4 -%196 = OpConstant %23 2 +%139 = OpTypePointer Input %7 +%138 = OpVariable %139 Input +%144 = OpConstant %7 264 +%147 = OpTypePointer Workgroup %4 +%148 = OpTypePointer Uniform %21 +%149 = OpTypePointer Uniform %20 +%150 = OpTypePointer Uniform %26 +%154 = OpTypePointer Uniform %17 +%155 = OpTypePointer Uniform %16 +%156 = OpTypePointer Uniform %12 +%161 = OpConstant %7 7 +%168 = OpConstant %7 6 +%170 = OpTypePointer StorageBuffer %10 +%171 = OpConstant %7 1 +%174 = OpConstant %7 5 +%176 = OpTypePointer Uniform %4 +%177 = OpConstant %7 3 +%180 = OpConstant %7 4 +%192 = OpConstant %23 2 %57 = OpFunction %2 None %58 %56 = OpFunctionParameter %8 %55 = OpLabel @@ -231,58 +228,57 @@ OpFunctionEnd %127 = OpAccessChain %126 %52 %64 OpBranch %135 %135 = OpLabel -%141 = OpLoad %138 %139 -%144 = OpIEqual %143 %141 %142 -%145 = OpAll %3 %144 -OpSelectionMerge %146 None -OpBranchConditional %145 %147 %146 -%147 = OpLabel +%140 = OpLoad %7 %138 +%141 = OpIEqual %3 %140 %64 +OpSelectionMerge %142 None +OpBranchConditional %141 %143 %142 +%143 = OpLabel OpStore %30 %136 OpStore %32 %137 -OpBranch %146 -%146 = OpLabel -OpControlBarrier %18 %18 %148 -OpBranch %149 -%149 = OpLabel -%150 = OpFunctionCall %2 %61 -%155 = OpAccessChain %154 %127 %64 %64 -%156 = OpLoad %26 %155 -%157 = OpFunctionCall %20 %96 %156 -%161 = OpAccessChain %160 %125 %64 %64 %64 -%162 = OpLoad %12 %161 -%163 = OpMatrixTimesVector %10 %157 %162 -%164 = OpCompositeExtract %4 %163 0 -%166 = OpAccessChain %151 %30 %165 -OpStore %166 %164 -%167 = OpLoad %25 %123 -%168 = OpFunctionCall %15 %105 %167 -%169 = OpLoad %8 %121 -%170 = OpMatrixTimesVector %10 %168 %169 -%171 = OpCompositeExtract %4 %170 0 -%173 = OpAccessChain %151 %30 %172 -OpStore %173 %171 -%176 = OpAccessChain %77 %117 %175 %175 -%177 = OpLoad %4 %176 -%179 = OpAccessChain %151 %30 %178 -OpStore %179 %177 -%182 = OpAccessChain %180 %119 %64 %181 +OpBranch %142 +%142 = OpLabel +OpControlBarrier %18 %18 %144 +OpBranch %145 +%145 = OpLabel +%146 = OpFunctionCall %2 %61 +%151 = OpAccessChain %150 %127 %64 %64 +%152 = OpLoad %26 %151 +%153 = OpFunctionCall %20 %96 %152 +%157 = OpAccessChain %156 %125 %64 %64 %64 +%158 = OpLoad %12 %157 +%159 = OpMatrixTimesVector %10 %153 %158 +%160 = OpCompositeExtract %4 %159 0 +%162 = OpAccessChain %147 %30 %161 +OpStore %162 %160 +%163 = OpLoad %25 %123 +%164 = OpFunctionCall %15 %105 %163 +%165 = OpLoad %8 %121 +%166 = OpMatrixTimesVector %10 %164 %165 +%167 = OpCompositeExtract %4 %166 0 +%169 = OpAccessChain %147 %30 %168 +OpStore %169 %167 +%172 = OpAccessChain %77 %117 %171 %171 +%173 = OpLoad %4 %172 +%175 = OpAccessChain %147 %30 %174 +OpStore %175 %173 +%178 = OpAccessChain %176 %119 %64 %177 +%179 = OpLoad %4 %178 +%181 = OpAccessChain %147 %30 %180 +OpStore %181 %179 +%182 = OpAccessChain %77 %115 %171 %183 = OpLoad %4 %182 -%185 = OpAccessChain %151 %30 %184 -OpStore %185 %183 -%186 = OpAccessChain %77 %115 %175 -%187 = OpLoad %4 %186 -%188 = OpAccessChain %151 %30 %181 -OpStore %188 %187 -%189 = OpAccessChain %77 %115 %64 %64 -%190 = OpLoad %4 %189 -%191 = OpAccessChain %151 %30 %18 +%184 = OpAccessChain %147 %30 %177 +OpStore %184 %183 +%185 = OpAccessChain %77 %115 %64 %64 +%186 = OpLoad %4 %185 +%187 = OpAccessChain %147 %30 %18 +OpStore %187 %186 +%188 = OpAccessChain %77 %115 %171 +OpStore %188 %130 +%189 = OpArrayLength %7 %37 0 +%190 = OpConvertUToF %4 %189 +%191 = OpAccessChain %147 %30 %171 OpStore %191 %190 -%192 = OpAccessChain %77 %115 %175 -OpStore %192 %130 -%193 = OpArrayLength %7 %37 0 -%194 = OpConvertUToF %4 %193 -%195 = OpAccessChain %151 %30 %175 -OpStore %195 %194 -OpAtomicStore %32 %196 %64 %18 +OpAtomicStore %32 %192 %64 %18 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-interface.compute.spvasm b/naga/tests/out/spv/wgsl-interface.compute.spvasm index 912d28d5b08..c3fb080b732 100644 --- a/naga/tests/out/spv/wgsl-interface.compute.spvasm +++ b/naga/tests/out/spv/wgsl-interface.compute.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 53 +; Bound: 50 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -43,12 +43,10 @@ OpDecorate %27 BuiltIn NumWorkgroups %27 = OpVariable %18 Input %30 = OpTypeFunction %2 %32 = OpConstantNull %9 -%33 = OpConstantNull %11 -%34 = OpTypeVector %8 3 -%39 = OpConstant %6 2 -%40 = OpConstant %6 264 -%42 = OpTypePointer Workgroup %6 -%51 = OpConstant %6 0 +%33 = OpConstant %6 0 +%37 = OpConstant %6 2 +%38 = OpConstant %6 264 +%40 = OpTypePointer Workgroup %6 %29 = OpFunction %2 None %30 %16 = OpLabel %19 = OpLoad %11 %17 @@ -58,26 +56,25 @@ OpDecorate %27 BuiltIn NumWorkgroups %28 = OpLoad %11 %27 OpBranch %31 %31 = OpLabel -%35 = OpIEqual %34 %21 %33 -%36 = OpAll %8 %35 -OpSelectionMerge %37 None -OpBranchConditional %36 %38 %37 -%38 = OpLabel +%34 = OpIEqual %8 %24 %33 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %35 +%36 = OpLabel OpStore %14 %32 -OpBranch %37 -%37 = OpLabel -OpControlBarrier %39 %39 %40 -OpBranch %41 -%41 = OpLabel -%43 = OpCompositeExtract %6 %19 0 -%44 = OpCompositeExtract %6 %21 0 -%45 = OpIAdd %6 %43 %44 -%46 = OpIAdd %6 %45 %24 -%47 = OpCompositeExtract %6 %26 0 +OpBranch %35 +%35 = OpLabel +OpControlBarrier %37 %37 %38 +OpBranch %39 +%39 = OpLabel +%41 = OpCompositeExtract %6 %19 0 +%42 = OpCompositeExtract %6 %21 0 +%43 = OpIAdd %6 %41 %42 +%44 = OpIAdd %6 %43 %24 +%45 = OpCompositeExtract %6 %26 0 +%46 = OpIAdd %6 %44 %45 +%47 = OpCompositeExtract %6 %28 0 %48 = OpIAdd %6 %46 %47 -%49 = OpCompositeExtract %6 %28 0 -%50 = OpIAdd %6 %48 %49 -%52 = OpAccessChain %42 %14 %51 -OpStore %52 %50 +%49 = OpAccessChain %40 %14 %33 +OpStore %49 %48 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm index 37834132871..d9f4f965053 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 100 +; Bound: 130 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %20 "ts_main" %15 -OpEntryPoint MeshEXT %44 "ms_main" %15 %30 %40 %43 %17 %47 -OpExecutionMode %20 LocalSize 1 1 1 -OpExecutionMode %44 LocalSize 1 1 1 -OpExecutionMode %44 OutputTrianglesEXT -OpExecutionMode %44 OutputVertices 3 -OpExecutionMode %44 OutputPrimitivesEXT 1 -OpDecorate %30 BuiltIn LocalInvocationIndex -OpMemberDecorate %37 0 BuiltIn Position -OpDecorate %37 Block -OpDecorate %43 BuiltIn PrimitiveTriangleIndicesEXT +OpEntryPoint TaskEXT %36 "ts_main" %15 %25 +OpEntryPoint MeshEXT %82 "ms_main" %15 %64 %69 %72 %17 %76 +OpExecutionMode %36 LocalSize 64 1 1 +OpExecutionMode %82 LocalSize 64 1 1 +OpExecutionMode %82 OutputTrianglesEXT +OpExecutionMode %82 OutputVertices 3 +OpExecutionMode %82 OutputPrimitivesEXT 1 +OpDecorate %64 BuiltIn LocalInvocationIndex +OpMemberDecorate %66 0 BuiltIn Position +OpDecorate %66 Block +OpDecorate %72 BuiltIn PrimitiveTriangleIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %9 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %14 0 Offset 0 OpMemberDecorate %14 1 Offset 48 OpMemberDecorate %14 2 Offset 64 OpMemberDecorate %14 3 Offset 68 -OpDecorate %47 BuiltIn LocalInvocationId +OpDecorate %25 BuiltIn LocalInvocationIndex +OpDecorate %76 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -45,119 +47,157 @@ OpDecorate %47 BuiltIn LocalInvocationId %15 = OpVariable %16 TaskPayloadWorkgroupEXT %18 = OpTypePointer Workgroup %14 %17 = OpVariable %18 Workgroup -%21 = OpTypeFunction %2 +%21 = OpTypeFunction %8 %22 = OpConstantComposite %8 %13 %13 %13 -%24 = OpConstant %3 2 -%25 = OpConstant %3 264 -%31 = OpTypePointer Input %3 -%30 = OpVariable %31 Input -%35 = OpTypePointer Function %3 -%37 = OpTypeStruct %5 -%38 = OpTypeArray %37 %11 -%39 = OpTypePointer Output %38 -%40 = OpVariable %39 Output -%41 = OpTypeArray %8 %13 -%42 = OpTypePointer Output %41 -%43 = OpVariable %42 Output -%46 = OpConstantNull %14 -%48 = OpTypePointer Input %8 -%47 = OpVariable %48 Input -%50 = OpConstantNull %8 -%52 = OpTypeBool -%51 = OpTypeVector %52 3 -%59 = OpTypePointer Workgroup %3 -%66 = OpTypePointer Workgroup %10 -%67 = OpConstant %3 0 -%69 = OpTypePointer Workgroup %12 -%76 = OpTypePointer Workgroup %5 -%79 = OpTypePointer Output %5 -%89 = OpTypePointer Workgroup %8 -%92 = OpTypePointer Output %8 -%20 = OpFunction %2 None %21 +%24 = OpConstantNull %4 +%26 = OpTypePointer Input %3 +%25 = OpVariable %26 Input +%28 = OpConstant %3 0 +%30 = OpTypeBool +%33 = OpConstant %3 2 +%34 = OpConstant %3 264 +%37 = OpTypeFunction %2 +%41 = OpTypeInt 64 0 +%40 = OpConstant %41 256 +%42 = OpConstant %41 1024 +%64 = OpVariable %26 Input +%65 = OpConstant %3 64 +%66 = OpTypeStruct %5 +%67 = OpTypeArray %66 %11 +%68 = OpTypePointer Output %67 +%69 = OpVariable %68 Output +%70 = OpTypeArray %8 %13 +%71 = OpTypePointer Output %70 +%72 = OpVariable %71 Output +%75 = OpConstantNull %14 +%76 = OpVariable %26 Input +%86 = OpTypePointer Function %3 +%90 = OpTypePointer Workgroup %3 +%97 = OpTypePointer Workgroup %10 +%99 = OpTypePointer Workgroup %12 +%106 = OpTypePointer Workgroup %5 +%109 = OpTypePointer Output %5 +%119 = OpTypePointer Workgroup %8 +%122 = OpTypePointer Output %8 +%20 = OpFunction %8 None %21 %19 = OpLabel OpBranch %23 %23 = OpLabel -OpControlBarrier %24 %24 %25 -%26 = OpCompositeExtract %3 %22 0 -%27 = OpCompositeExtract %3 %22 1 -%28 = OpCompositeExtract %3 %22 2 -OpEmitMeshTasksEXT %26 %27 %28 %15 +%27 = OpLoad %3 %25 +%29 = OpIEqual %30 %27 %28 +OpSelectionMerge %31 None +OpBranchConditional %29 %32 %31 +%32 = OpLabel +OpStore %15 %24 +OpBranch %31 +%31 = OpLabel +OpControlBarrier %33 %33 %34 +OpBranch %35 +%35 = OpLabel +OpReturnValue %22 OpFunctionEnd -%44 = OpFunction %2 None %21 -%29 = OpLabel -%33 = OpVariable %35 Function -%34 = OpVariable %35 Function -%32 = OpLoad %3 %30 -OpBranch %45 -%45 = OpLabel -%49 = OpLoad %8 %47 -%53 = OpIEqual %51 %49 %50 -%54 = OpAll %52 %53 -OpSelectionMerge %55 None -OpBranchConditional %54 %56 %55 -%56 = OpLabel -OpStore %17 %46 -OpBranch %55 -%55 = OpLabel -OpControlBarrier %24 %24 %25 -OpBranch %57 -%57 = OpLabel -OpBranch %36 -%36 = OpLabel -OpControlBarrier %24 %24 %25 -%58 = OpAccessChain %59 %17 %24 -%60 = OpLoad %3 %58 -%61 = OpExtInst %3 %1 UMin %60 %11 -%62 = OpAccessChain %59 %17 %11 -%63 = OpLoad %3 %62 -%64 = OpExtInst %3 %1 UMin %63 %13 -%65 = OpAccessChain %66 %17 %67 -%68 = OpAccessChain %69 %17 %13 -OpControlBarrier %24 %24 %25 -OpSetMeshOutputsEXT %61 %64 -OpStore %33 %32 -OpBranch %70 -%70 = OpLabel -OpLoopMerge %72 %81 None -OpBranch %80 +%36 = OpFunction %2 None %37 +%38 = OpLabel +%39 = OpFunctionCall %8 %20 +OpControlBarrier %33 %33 %34 +%46 = OpCompositeExtract %3 %39 0 +%43 = OpUConvert %41 %46 +%47 = OpCompositeExtract %3 %39 1 +%44 = OpUConvert %41 %47 +%48 = OpCompositeExtract %3 %39 2 +%45 = OpUConvert %41 %48 +%49 = OpIMul %41 %43 %44 +%50 = OpIMul %41 %49 %45 +%51 = OpUGreaterThanEqual %30 %50 %42 +%52 = OpUGreaterThanEqual %30 %43 %40 +%53 = OpUGreaterThanEqual %30 %44 %40 +%54 = OpUGreaterThanEqual %30 %45 %40 +%55 = OpLogicalOr %30 %51 %52 +%56 = OpLogicalOr %30 %55 %53 +%57 = OpLogicalOr %30 %56 %54 +%58 = OpCompositeConstruct %8 %28 %28 %28 +%59 = OpSelect %8 %57 %58 %39 +%60 = OpCompositeExtract %3 %59 0 +%61 = OpCompositeExtract %3 %59 1 +%62 = OpCompositeExtract %3 %59 2 +OpEmitMeshTasksEXT %60 %61 %62 %15 +OpFunctionEnd +%73 = OpFunction %2 None %37 +%63 = OpLabel +OpBranch %74 +%74 = OpLabel +%77 = OpLoad %3 %76 +%78 = OpIEqual %30 %77 %28 +OpSelectionMerge %79 None +OpBranchConditional %78 %80 %79 %80 = OpLabel -%83 = OpLoad %3 %33 -%84 = OpULessThan %52 %83 %61 -OpBranchConditional %84 %82 %72 -%82 = OpLabel -%74 = OpLoad %3 %33 -%75 = OpAccessChain %76 %65 %74 %67 -%77 = OpLoad %5 %75 -%78 = OpAccessChain %79 %40 %74 %67 -OpStore %78 %77 +OpStore %17 %75 +OpBranch %79 +%79 = OpLabel +OpControlBarrier %33 %33 %34 OpBranch %81 %81 = OpLabel -%85 = OpLoad %3 %33 -%86 = OpIAdd %3 %85 %13 -OpStore %33 %86 -OpBranch %70 -%72 = OpLabel -OpStore %34 %32 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %94 None -OpBranch %93 -%93 = OpLabel -%96 = OpLoad %3 %34 -%97 = OpULessThan %52 %96 %64 -OpBranchConditional %97 %95 %73 -%95 = OpLabel -%87 = OpLoad %3 %34 -%88 = OpAccessChain %89 %68 %87 %67 -%90 = OpLoad %8 %88 -%91 = OpAccessChain %92 %43 %87 -OpStore %91 %90 -OpBranch %94 -%94 = OpLabel -%98 = OpLoad %3 %34 -%99 = OpIAdd %3 %98 %13 -OpStore %34 %99 -OpBranch %71 -%73 = OpLabel +OpReturn +OpFunctionEnd +%82 = OpFunction %2 None %37 +%83 = OpLabel +%84 = OpVariable %86 Function +%85 = OpVariable %86 Function +%87 = OpLoad %3 %64 +%88 = OpFunctionCall %2 %73 +OpControlBarrier %33 %33 %34 +%89 = OpAccessChain %90 %17 %33 +%91 = OpLoad %3 %89 +%92 = OpExtInst %3 %1 UMin %91 %11 +%93 = OpAccessChain %90 %17 %11 +%94 = OpLoad %3 %93 +%95 = OpExtInst %3 %1 UMin %94 %13 +%96 = OpAccessChain %97 %17 %28 +%98 = OpAccessChain %99 %17 %13 +OpSetMeshOutputsEXT %92 %95 +OpStore %84 %87 +OpBranch %100 +%100 = OpLabel +OpLoopMerge %102 %111 None +OpBranch %110 +%110 = OpLabel +%113 = OpLoad %3 %84 +%114 = OpULessThan %30 %113 %92 +OpBranchConditional %114 %112 %102 +%112 = OpLabel +%104 = OpLoad %3 %84 +%105 = OpAccessChain %106 %96 %104 %28 +%107 = OpLoad %5 %105 +%108 = OpAccessChain %109 %69 %104 %28 +OpStore %108 %107 +OpBranch %111 +%111 = OpLabel +%115 = OpLoad %3 %84 +%116 = OpIAdd %3 %115 %65 +OpStore %84 %116 +OpBranch %100 +%102 = OpLabel +OpStore %85 %87 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %124 None +OpBranch %123 +%123 = OpLabel +%126 = OpLoad %3 %85 +%127 = OpULessThan %30 %126 %95 +OpBranchConditional %127 %125 %103 +%125 = OpLabel +%117 = OpLoad %3 %85 +%118 = OpAccessChain %119 %98 %117 %28 +%120 = OpLoad %8 %118 +%121 = OpAccessChain %122 %72 %117 +OpStore %121 %120 +OpBranch %124 +%124 = OpLabel +%128 = OpLoad %3 %85 +%129 = OpIAdd %3 %128 %65 +OpStore %85 %129 +OpBranch %101 +%103 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm index 0bf21f6ada3..08ca704b5bf 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 101 +; Bound: 131 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %21 "ts_main" %16 -OpEntryPoint MeshEXT %44 "ms_main" %16 %30 %40 %43 %18 %47 -OpExecutionMode %21 LocalSize 1 1 1 -OpExecutionMode %44 LocalSize 1 1 1 -OpExecutionMode %44 OutputLinesEXT -OpExecutionMode %44 OutputVertices 2 -OpExecutionMode %44 OutputPrimitivesEXT 1 -OpDecorate %30 BuiltIn LocalInvocationIndex -OpMemberDecorate %37 0 BuiltIn Position -OpDecorate %37 Block -OpDecorate %43 BuiltIn PrimitiveLineIndicesEXT +OpEntryPoint TaskEXT %36 "ts_main" %16 %26 +OpEntryPoint MeshEXT %82 "ms_main" %16 %64 %69 %72 %18 %76 +OpExecutionMode %36 LocalSize 64 1 1 +OpExecutionMode %82 LocalSize 64 1 1 +OpExecutionMode %82 OutputLinesEXT +OpExecutionMode %82 OutputVertices 2 +OpExecutionMode %82 OutputPrimitivesEXT 1 +OpDecorate %64 BuiltIn LocalInvocationIndex +OpMemberDecorate %66 0 BuiltIn Position +OpDecorate %66 Block +OpDecorate %72 BuiltIn PrimitiveLineIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %9 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %15 0 Offset 0 OpMemberDecorate %15 1 Offset 32 OpMemberDecorate %15 2 Offset 40 OpMemberDecorate %15 3 Offset 44 -OpDecorate %47 BuiltIn LocalInvocationId +OpDecorate %26 BuiltIn LocalInvocationIndex +OpDecorate %76 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -46,119 +48,157 @@ OpDecorate %47 BuiltIn LocalInvocationId %16 = OpVariable %17 TaskPayloadWorkgroupEXT %19 = OpTypePointer Workgroup %15 %18 = OpVariable %19 Workgroup -%22 = OpTypeFunction %2 +%22 = OpTypeFunction %10 %23 = OpConstantComposite %10 %14 %14 %14 -%25 = OpConstant %3 264 -%31 = OpTypePointer Input %3 -%30 = OpVariable %31 Input -%35 = OpTypePointer Function %3 -%37 = OpTypeStruct %5 -%38 = OpTypeArray %37 %12 -%39 = OpTypePointer Output %38 -%40 = OpVariable %39 Output -%41 = OpTypeArray %8 %14 -%42 = OpTypePointer Output %41 -%43 = OpVariable %42 Output -%46 = OpConstantNull %15 -%48 = OpTypePointer Input %10 -%47 = OpVariable %48 Input -%50 = OpConstantNull %10 -%52 = OpTypeBool -%51 = OpTypeVector %52 3 -%59 = OpTypePointer Workgroup %3 -%63 = OpConstant %3 3 -%67 = OpTypePointer Workgroup %11 -%68 = OpConstant %3 0 -%70 = OpTypePointer Workgroup %13 -%77 = OpTypePointer Workgroup %5 -%80 = OpTypePointer Output %5 -%90 = OpTypePointer Workgroup %8 -%93 = OpTypePointer Output %8 -%21 = OpFunction %2 None %22 +%25 = OpConstantNull %4 +%27 = OpTypePointer Input %3 +%26 = OpVariable %27 Input +%29 = OpConstant %3 0 +%31 = OpTypeBool +%34 = OpConstant %3 264 +%37 = OpTypeFunction %2 +%41 = OpTypeInt 64 0 +%40 = OpConstant %41 256 +%42 = OpConstant %41 1024 +%64 = OpVariable %27 Input +%65 = OpConstant %3 64 +%66 = OpTypeStruct %5 +%67 = OpTypeArray %66 %12 +%68 = OpTypePointer Output %67 +%69 = OpVariable %68 Output +%70 = OpTypeArray %8 %14 +%71 = OpTypePointer Output %70 +%72 = OpVariable %71 Output +%75 = OpConstantNull %15 +%76 = OpVariable %27 Input +%86 = OpTypePointer Function %3 +%90 = OpTypePointer Workgroup %3 +%94 = OpConstant %3 3 +%98 = OpTypePointer Workgroup %11 +%100 = OpTypePointer Workgroup %13 +%107 = OpTypePointer Workgroup %5 +%110 = OpTypePointer Output %5 +%120 = OpTypePointer Workgroup %8 +%123 = OpTypePointer Output %8 +%21 = OpFunction %10 None %22 %20 = OpLabel OpBranch %24 %24 = OpLabel -OpControlBarrier %12 %12 %25 -%26 = OpCompositeExtract %3 %23 0 -%27 = OpCompositeExtract %3 %23 1 -%28 = OpCompositeExtract %3 %23 2 -OpEmitMeshTasksEXT %26 %27 %28 %16 +%28 = OpLoad %3 %26 +%30 = OpIEqual %31 %28 %29 +OpSelectionMerge %32 None +OpBranchConditional %30 %33 %32 +%33 = OpLabel +OpStore %16 %25 +OpBranch %32 +%32 = OpLabel +OpControlBarrier %12 %12 %34 +OpBranch %35 +%35 = OpLabel +OpReturnValue %23 OpFunctionEnd -%44 = OpFunction %2 None %22 -%29 = OpLabel -%33 = OpVariable %35 Function -%34 = OpVariable %35 Function -%32 = OpLoad %3 %30 -OpBranch %45 -%45 = OpLabel -%49 = OpLoad %10 %47 -%53 = OpIEqual %51 %49 %50 -%54 = OpAll %52 %53 -OpSelectionMerge %55 None -OpBranchConditional %54 %56 %55 -%56 = OpLabel -OpStore %18 %46 -OpBranch %55 -%55 = OpLabel -OpControlBarrier %12 %12 %25 -OpBranch %57 -%57 = OpLabel -OpBranch %36 -%36 = OpLabel -OpControlBarrier %12 %12 %25 -%58 = OpAccessChain %59 %18 %12 -%60 = OpLoad %3 %58 -%61 = OpExtInst %3 %1 UMin %60 %12 -%62 = OpAccessChain %59 %18 %63 -%64 = OpLoad %3 %62 -%65 = OpExtInst %3 %1 UMin %64 %14 -%66 = OpAccessChain %67 %18 %68 -%69 = OpAccessChain %70 %18 %14 -OpControlBarrier %12 %12 %25 -OpSetMeshOutputsEXT %61 %65 -OpStore %33 %32 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %82 None +%36 = OpFunction %2 None %37 +%38 = OpLabel +%39 = OpFunctionCall %10 %21 +OpControlBarrier %12 %12 %34 +%46 = OpCompositeExtract %3 %39 0 +%43 = OpUConvert %41 %46 +%47 = OpCompositeExtract %3 %39 1 +%44 = OpUConvert %41 %47 +%48 = OpCompositeExtract %3 %39 2 +%45 = OpUConvert %41 %48 +%49 = OpIMul %41 %43 %44 +%50 = OpIMul %41 %49 %45 +%51 = OpUGreaterThanEqual %31 %50 %42 +%52 = OpUGreaterThanEqual %31 %43 %40 +%53 = OpUGreaterThanEqual %31 %44 %40 +%54 = OpUGreaterThanEqual %31 %45 %40 +%55 = OpLogicalOr %31 %51 %52 +%56 = OpLogicalOr %31 %55 %53 +%57 = OpLogicalOr %31 %56 %54 +%58 = OpCompositeConstruct %10 %29 %29 %29 +%59 = OpSelect %10 %57 %58 %39 +%60 = OpCompositeExtract %3 %59 0 +%61 = OpCompositeExtract %3 %59 1 +%62 = OpCompositeExtract %3 %59 2 +OpEmitMeshTasksEXT %60 %61 %62 %16 +OpFunctionEnd +%73 = OpFunction %2 None %37 +%63 = OpLabel +OpBranch %74 +%74 = OpLabel +%77 = OpLoad %3 %76 +%78 = OpIEqual %31 %77 %29 +OpSelectionMerge %79 None +OpBranchConditional %78 %80 %79 +%80 = OpLabel +OpStore %18 %75 +OpBranch %79 +%79 = OpLabel +OpControlBarrier %12 %12 %34 OpBranch %81 %81 = OpLabel -%84 = OpLoad %3 %33 -%85 = OpULessThan %52 %84 %61 -OpBranchConditional %85 %83 %73 +OpReturn +OpFunctionEnd +%82 = OpFunction %2 None %37 %83 = OpLabel -%75 = OpLoad %3 %33 -%76 = OpAccessChain %77 %66 %75 %68 -%78 = OpLoad %5 %76 -%79 = OpAccessChain %80 %40 %75 %68 -OpStore %79 %78 -OpBranch %82 -%82 = OpLabel -%86 = OpLoad %3 %33 -%87 = OpIAdd %3 %86 %14 -OpStore %33 %87 -OpBranch %71 -%73 = OpLabel -OpStore %34 %32 -OpBranch %72 -%72 = OpLabel -OpLoopMerge %74 %95 None -OpBranch %94 -%94 = OpLabel -%97 = OpLoad %3 %34 -%98 = OpULessThan %52 %97 %65 -OpBranchConditional %98 %96 %74 -%96 = OpLabel -%88 = OpLoad %3 %34 -%89 = OpAccessChain %90 %69 %88 %68 -%91 = OpLoad %8 %89 -%92 = OpAccessChain %93 %43 %88 -OpStore %92 %91 -OpBranch %95 -%95 = OpLabel -%99 = OpLoad %3 %34 -%100 = OpIAdd %3 %99 %14 -OpStore %34 %100 -OpBranch %72 -%74 = OpLabel +%84 = OpVariable %86 Function +%85 = OpVariable %86 Function +%87 = OpLoad %3 %64 +%88 = OpFunctionCall %2 %73 +OpControlBarrier %12 %12 %34 +%89 = OpAccessChain %90 %18 %12 +%91 = OpLoad %3 %89 +%92 = OpExtInst %3 %1 UMin %91 %12 +%93 = OpAccessChain %90 %18 %94 +%95 = OpLoad %3 %93 +%96 = OpExtInst %3 %1 UMin %95 %14 +%97 = OpAccessChain %98 %18 %29 +%99 = OpAccessChain %100 %18 %14 +OpSetMeshOutputsEXT %92 %96 +OpStore %84 %87 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %112 None +OpBranch %111 +%111 = OpLabel +%114 = OpLoad %3 %84 +%115 = OpULessThan %31 %114 %92 +OpBranchConditional %115 %113 %103 +%113 = OpLabel +%105 = OpLoad %3 %84 +%106 = OpAccessChain %107 %97 %105 %29 +%108 = OpLoad %5 %106 +%109 = OpAccessChain %110 %69 %105 %29 +OpStore %109 %108 +OpBranch %112 +%112 = OpLabel +%116 = OpLoad %3 %84 +%117 = OpIAdd %3 %116 %65 +OpStore %84 %117 +OpBranch %101 +%103 = OpLabel +OpStore %85 %87 +OpBranch %102 +%102 = OpLabel +OpLoopMerge %104 %125 None +OpBranch %124 +%124 = OpLabel +%127 = OpLoad %3 %85 +%128 = OpULessThan %31 %127 %96 +OpBranchConditional %128 %126 %104 +%126 = OpLabel +%118 = OpLoad %3 %85 +%119 = OpAccessChain %120 %99 %118 %29 +%121 = OpLoad %8 %119 +%122 = OpAccessChain %123 %72 %118 +OpStore %122 %121 +OpBranch %125 +%125 = OpLabel +%129 = OpLoad %3 %85 +%130 = OpIAdd %3 %129 %65 +OpStore %85 %130 +OpBranch %102 +%104 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm index 40417f67011..2db0562c267 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 99 +; Bound: 129 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %19 "ts_main" %14 -OpEntryPoint MeshEXT %43 "ms_main" %14 %29 %39 %42 %16 %46 -OpExecutionMode %19 LocalSize 1 1 1 -OpExecutionMode %43 LocalSize 1 1 1 -OpExecutionMode %43 OutputPoints -OpExecutionMode %43 OutputVertices 1 -OpExecutionMode %43 OutputPrimitivesEXT 1 -OpDecorate %29 BuiltIn LocalInvocationIndex -OpMemberDecorate %36 0 BuiltIn Position -OpDecorate %36 Block -OpDecorate %42 BuiltIn PrimitivePointIndicesEXT +OpEntryPoint TaskEXT %35 "ts_main" %14 %24 +OpEntryPoint MeshEXT %81 "ms_main" %14 %63 %68 %71 %16 %75 +OpExecutionMode %35 LocalSize 64 1 1 +OpExecutionMode %81 LocalSize 64 1 1 +OpExecutionMode %81 OutputPoints +OpExecutionMode %81 OutputVertices 1 +OpExecutionMode %81 OutputPrimitivesEXT 1 +OpDecorate %63 BuiltIn LocalInvocationIndex +OpMemberDecorate %65 0 BuiltIn Position +OpDecorate %65 Block +OpDecorate %71 BuiltIn PrimitivePointIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %8 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %13 0 Offset 0 OpMemberDecorate %13 1 Offset 16 OpMemberDecorate %13 2 Offset 20 OpMemberDecorate %13 3 Offset 24 -OpDecorate %46 BuiltIn LocalInvocationId +OpDecorate %24 BuiltIn LocalInvocationIndex +OpDecorate %75 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -44,119 +46,157 @@ OpDecorate %46 BuiltIn LocalInvocationId %14 = OpVariable %15 TaskPayloadWorkgroupEXT %17 = OpTypePointer Workgroup %13 %16 = OpVariable %17 Workgroup -%20 = OpTypeFunction %2 +%20 = OpTypeFunction %9 %21 = OpConstantComposite %9 %11 %11 %11 -%23 = OpConstant %3 2 -%24 = OpConstant %3 264 -%30 = OpTypePointer Input %3 -%29 = OpVariable %30 Input -%34 = OpTypePointer Function %3 -%36 = OpTypeStruct %5 -%37 = OpTypeArray %36 %11 -%38 = OpTypePointer Output %37 -%39 = OpVariable %38 Output -%40 = OpTypeArray %3 %11 -%41 = OpTypePointer Output %40 -%42 = OpVariable %41 Output -%45 = OpConstantNull %13 -%47 = OpTypePointer Input %9 -%46 = OpVariable %47 Input -%49 = OpConstantNull %9 -%51 = OpTypeBool -%50 = OpTypeVector %51 3 -%58 = OpTypePointer Workgroup %3 -%62 = OpConstant %3 3 -%66 = OpTypePointer Workgroup %10 -%67 = OpConstant %3 0 -%69 = OpTypePointer Workgroup %12 -%76 = OpTypePointer Workgroup %5 -%79 = OpTypePointer Output %5 -%91 = OpTypePointer Output %3 -%19 = OpFunction %2 None %20 +%23 = OpConstantNull %4 +%25 = OpTypePointer Input %3 +%24 = OpVariable %25 Input +%27 = OpConstant %3 0 +%29 = OpTypeBool +%32 = OpConstant %3 2 +%33 = OpConstant %3 264 +%36 = OpTypeFunction %2 +%40 = OpTypeInt 64 0 +%39 = OpConstant %40 256 +%41 = OpConstant %40 1024 +%63 = OpVariable %25 Input +%64 = OpConstant %3 64 +%65 = OpTypeStruct %5 +%66 = OpTypeArray %65 %11 +%67 = OpTypePointer Output %66 +%68 = OpVariable %67 Output +%69 = OpTypeArray %3 %11 +%70 = OpTypePointer Output %69 +%71 = OpVariable %70 Output +%74 = OpConstantNull %13 +%75 = OpVariable %25 Input +%85 = OpTypePointer Function %3 +%89 = OpTypePointer Workgroup %3 +%93 = OpConstant %3 3 +%97 = OpTypePointer Workgroup %10 +%99 = OpTypePointer Workgroup %12 +%106 = OpTypePointer Workgroup %5 +%109 = OpTypePointer Output %5 +%121 = OpTypePointer Output %3 +%19 = OpFunction %9 None %20 %18 = OpLabel OpBranch %22 %22 = OpLabel -OpControlBarrier %23 %23 %24 -%25 = OpCompositeExtract %3 %21 0 -%26 = OpCompositeExtract %3 %21 1 -%27 = OpCompositeExtract %3 %21 2 -OpEmitMeshTasksEXT %25 %26 %27 %14 +%26 = OpLoad %3 %24 +%28 = OpIEqual %29 %26 %27 +OpSelectionMerge %30 None +OpBranchConditional %28 %31 %30 +%31 = OpLabel +OpStore %14 %23 +OpBranch %30 +%30 = OpLabel +OpControlBarrier %32 %32 %33 +OpBranch %34 +%34 = OpLabel +OpReturnValue %21 OpFunctionEnd -%43 = OpFunction %2 None %20 -%28 = OpLabel -%32 = OpVariable %34 Function -%33 = OpVariable %34 Function -%31 = OpLoad %3 %29 -OpBranch %44 -%44 = OpLabel -%48 = OpLoad %9 %46 -%52 = OpIEqual %50 %48 %49 -%53 = OpAll %51 %52 -OpSelectionMerge %54 None -OpBranchConditional %53 %55 %54 -%55 = OpLabel -OpStore %16 %45 -OpBranch %54 -%54 = OpLabel -OpControlBarrier %23 %23 %24 -OpBranch %56 -%56 = OpLabel -OpBranch %35 -%35 = OpLabel -OpControlBarrier %23 %23 %24 -%57 = OpAccessChain %58 %16 %23 -%59 = OpLoad %3 %57 -%60 = OpExtInst %3 %1 UMin %59 %11 -%61 = OpAccessChain %58 %16 %62 -%63 = OpLoad %3 %61 -%64 = OpExtInst %3 %1 UMin %63 %11 -%65 = OpAccessChain %66 %16 %67 -%68 = OpAccessChain %69 %16 %11 -OpControlBarrier %23 %23 %24 -OpSetMeshOutputsEXT %60 %64 -OpStore %32 %31 -OpBranch %70 -%70 = OpLabel -OpLoopMerge %72 %81 None +%35 = OpFunction %2 None %36 +%37 = OpLabel +%38 = OpFunctionCall %9 %19 +OpControlBarrier %32 %32 %33 +%45 = OpCompositeExtract %3 %38 0 +%42 = OpUConvert %40 %45 +%46 = OpCompositeExtract %3 %38 1 +%43 = OpUConvert %40 %46 +%47 = OpCompositeExtract %3 %38 2 +%44 = OpUConvert %40 %47 +%48 = OpIMul %40 %42 %43 +%49 = OpIMul %40 %48 %44 +%50 = OpUGreaterThanEqual %29 %49 %41 +%51 = OpUGreaterThanEqual %29 %42 %39 +%52 = OpUGreaterThanEqual %29 %43 %39 +%53 = OpUGreaterThanEqual %29 %44 %39 +%54 = OpLogicalOr %29 %50 %51 +%55 = OpLogicalOr %29 %54 %52 +%56 = OpLogicalOr %29 %55 %53 +%57 = OpCompositeConstruct %9 %27 %27 %27 +%58 = OpSelect %9 %56 %57 %38 +%59 = OpCompositeExtract %3 %58 0 +%60 = OpCompositeExtract %3 %58 1 +%61 = OpCompositeExtract %3 %58 2 +OpEmitMeshTasksEXT %59 %60 %61 %14 +OpFunctionEnd +%72 = OpFunction %2 None %36 +%62 = OpLabel +OpBranch %73 +%73 = OpLabel +%76 = OpLoad %3 %75 +%77 = OpIEqual %29 %76 %27 +OpSelectionMerge %78 None +OpBranchConditional %77 %79 %78 +%79 = OpLabel +OpStore %16 %74 +OpBranch %78 +%78 = OpLabel +OpControlBarrier %32 %32 %33 OpBranch %80 %80 = OpLabel -%83 = OpLoad %3 %32 -%84 = OpULessThan %51 %83 %60 -OpBranchConditional %84 %82 %72 +OpReturn +OpFunctionEnd +%81 = OpFunction %2 None %36 %82 = OpLabel -%74 = OpLoad %3 %32 -%75 = OpAccessChain %76 %65 %74 %67 -%77 = OpLoad %5 %75 -%78 = OpAccessChain %79 %39 %74 %67 -OpStore %78 %77 -OpBranch %81 -%81 = OpLabel -%85 = OpLoad %3 %32 -%86 = OpIAdd %3 %85 %11 -OpStore %32 %86 -OpBranch %70 -%72 = OpLabel -OpStore %33 %31 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %93 None -OpBranch %92 -%92 = OpLabel -%95 = OpLoad %3 %33 -%96 = OpULessThan %51 %95 %64 -OpBranchConditional %96 %94 %73 -%94 = OpLabel -%87 = OpLoad %3 %33 -%88 = OpAccessChain %58 %68 %87 %67 -%89 = OpLoad %3 %88 -%90 = OpAccessChain %91 %42 %87 -OpStore %90 %89 -OpBranch %93 -%93 = OpLabel -%97 = OpLoad %3 %33 -%98 = OpIAdd %3 %97 %11 -OpStore %33 %98 -OpBranch %71 -%73 = OpLabel +%83 = OpVariable %85 Function +%84 = OpVariable %85 Function +%86 = OpLoad %3 %63 +%87 = OpFunctionCall %2 %72 +OpControlBarrier %32 %32 %33 +%88 = OpAccessChain %89 %16 %32 +%90 = OpLoad %3 %88 +%91 = OpExtInst %3 %1 UMin %90 %11 +%92 = OpAccessChain %89 %16 %93 +%94 = OpLoad %3 %92 +%95 = OpExtInst %3 %1 UMin %94 %11 +%96 = OpAccessChain %97 %16 %27 +%98 = OpAccessChain %99 %16 %11 +OpSetMeshOutputsEXT %91 %95 +OpStore %83 %86 +OpBranch %100 +%100 = OpLabel +OpLoopMerge %102 %111 None +OpBranch %110 +%110 = OpLabel +%113 = OpLoad %3 %83 +%114 = OpULessThan %29 %113 %91 +OpBranchConditional %114 %112 %102 +%112 = OpLabel +%104 = OpLoad %3 %83 +%105 = OpAccessChain %106 %96 %104 %27 +%107 = OpLoad %5 %105 +%108 = OpAccessChain %109 %68 %104 %27 +OpStore %108 %107 +OpBranch %111 +%111 = OpLabel +%115 = OpLoad %3 %83 +%116 = OpIAdd %3 %115 %64 +OpStore %83 %116 +OpBranch %100 +%102 = OpLabel +OpStore %84 %86 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %123 None +OpBranch %122 +%122 = OpLabel +%125 = OpLoad %3 %84 +%126 = OpULessThan %29 %125 %95 +OpBranchConditional %126 %124 %103 +%124 = OpLabel +%117 = OpLoad %3 %84 +%118 = OpAccessChain %89 %98 %117 %27 +%119 = OpLoad %3 %118 +%120 = OpAccessChain %121 %71 %117 +OpStore %120 %119 +OpBranch %123 +%123 = OpLabel +%127 = OpLoad %3 %84 +%128 = OpIAdd %3 %127 %64 +OpStore %84 %128 +OpBranch %101 +%103 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader.spvasm b/naga/tests/out/spv/wgsl-mesh-shader.spvasm index 4fe27cf3601..973812153f3 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader.spvasm @@ -1,65 +1,67 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 384 +; Bound: 449 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %24 "ts_main" %17 %19 %33 -OpEntryPoint TaskEXT %57 "ts_divergent" %54 %17 -OpEntryPoint MeshEXT %95 "ms_main" %17 %72 %81 %85 %88 %91 %94 %19 %21 %107 -OpEntryPoint MeshEXT %214 "ms_no_ts" %192 %200 %204 %207 %210 %213 %19 %21 %217 -OpEntryPoint MeshEXT %301 "ms_divergent" %279 %287 %291 %294 %297 %300 %19 %21 %303 -OpEntryPoint Fragment %379 "fs_main" %370 %373 %376 %378 -OpExecutionMode %24 LocalSize 1 1 1 -OpExecutionMode %57 LocalSize 2 1 1 -OpExecutionMode %95 LocalSize 1 1 1 -OpExecutionMode %95 OutputTrianglesEXT -OpExecutionMode %95 OutputVertices 3 -OpExecutionMode %95 OutputPrimitivesEXT 1 -OpExecutionMode %214 LocalSize 1 1 1 -OpExecutionMode %214 OutputTrianglesEXT -OpExecutionMode %214 OutputVertices 3 -OpExecutionMode %214 OutputPrimitivesEXT 1 -OpExecutionMode %301 LocalSize 1 1 1 -OpExecutionMode %301 OutputTrianglesEXT -OpExecutionMode %301 OutputVertices 3 -OpExecutionMode %301 OutputPrimitivesEXT 1 -OpExecutionMode %379 OriginUpperLeft -OpDecorate %72 BuiltIn LocalInvocationIndex -OpMemberDecorate %78 0 BuiltIn Position -OpDecorate %78 Block -OpMemberDecorate %82 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %82 0 PerPrimitiveEXT -OpDecorate %82 Block -OpDecorate %85 PerPrimitiveEXT -OpDecorate %88 Location 0 -OpDecorate %91 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %94 Location 1 -OpDecorate %94 PerPrimitiveEXT -OpDecorate %192 BuiltIn LocalInvocationIndex -OpMemberDecorate %197 0 BuiltIn Position -OpDecorate %197 Block -OpMemberDecorate %201 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %201 0 PerPrimitiveEXT -OpDecorate %201 Block -OpDecorate %204 PerPrimitiveEXT -OpDecorate %207 Location 0 -OpDecorate %210 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %213 Location 1 -OpDecorate %213 PerPrimitiveEXT -OpMemberDecorate %284 0 BuiltIn Position -OpDecorate %284 Block -OpMemberDecorate %288 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %288 0 PerPrimitiveEXT -OpDecorate %288 Block -OpDecorate %291 PerPrimitiveEXT -OpDecorate %294 Location 0 -OpDecorate %297 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %300 Location 1 -OpDecorate %300 PerPrimitiveEXT +OpEntryPoint TaskEXT %62 "ts_main" %17 %19 %47 +OpEntryPoint TaskEXT %108 "ts_divergent" %90 %17 %96 +OpEntryPoint MeshEXT %198 "ms_main" %17 %132 %136 %140 %143 %146 %149 %19 %21 %162 +OpEntryPoint MeshEXT %290 "ms_no_ts" %252 %256 %260 %263 %266 %269 %19 %21 %273 +OpEntryPoint MeshEXT %384 "ms_divergent" %340 %342 %346 %350 %353 %356 %359 %19 %21 %362 +OpEntryPoint Fragment %444 "fs_main" %435 %438 %441 %443 +OpExecutionMode %62 LocalSize 1 1 1 +OpExecutionMode %108 LocalSize 2 1 1 +OpExecutionMode %198 LocalSize 1 1 1 +OpExecutionMode %198 OutputTrianglesEXT +OpExecutionMode %198 OutputVertices 3 +OpExecutionMode %198 OutputPrimitivesEXT 1 +OpExecutionMode %290 LocalSize 1 1 1 +OpExecutionMode %290 OutputTrianglesEXT +OpExecutionMode %290 OutputVertices 3 +OpExecutionMode %290 OutputPrimitivesEXT 1 +OpExecutionMode %384 LocalSize 2 1 1 +OpExecutionMode %384 OutputTrianglesEXT +OpExecutionMode %384 OutputVertices 3 +OpExecutionMode %384 OutputPrimitivesEXT 1 +OpExecutionMode %444 OriginUpperLeft +OpDecorate %132 BuiltIn LocalInvocationIndex +OpMemberDecorate %133 0 BuiltIn Position +OpDecorate %133 Block +OpMemberDecorate %137 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %137 0 PerPrimitiveEXT +OpDecorate %137 Block +OpDecorate %140 PerPrimitiveEXT +OpDecorate %143 Location 0 +OpDecorate %146 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %149 Location 1 +OpDecorate %149 PerPrimitiveEXT +OpDecorate %252 BuiltIn LocalInvocationIndex +OpMemberDecorate %253 0 BuiltIn Position +OpDecorate %253 Block +OpMemberDecorate %257 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %257 0 PerPrimitiveEXT +OpDecorate %257 Block +OpDecorate %260 PerPrimitiveEXT +OpDecorate %263 Location 0 +OpDecorate %266 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %269 Location 1 +OpDecorate %269 PerPrimitiveEXT +OpDecorate %342 BuiltIn LocalInvocationIndex +OpMemberDecorate %343 0 BuiltIn Position +OpDecorate %343 Block +OpMemberDecorate %347 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %347 0 PerPrimitiveEXT +OpDecorate %347 Block +OpDecorate %350 PerPrimitiveEXT +OpDecorate %353 Location 0 +OpDecorate %356 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %359 Location 1 +OpDecorate %359 PerPrimitiveEXT OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 16 OpMemberDecorate %7 0 Offset 0 @@ -74,17 +76,18 @@ OpMemberDecorate %16 0 Offset 0 OpMemberDecorate %16 1 Offset 96 OpMemberDecorate %16 2 Offset 128 OpMemberDecorate %16 3 Offset 132 -OpDecorate %33 BuiltIn LocalInvocationId -OpDecorate %54 BuiltIn LocalInvocationIndex -OpDecorate %107 BuiltIn LocalInvocationId -OpDecorate %217 BuiltIn LocalInvocationId -OpDecorate %279 BuiltIn LocalInvocationIndex -OpDecorate %303 BuiltIn LocalInvocationId -OpDecorate %370 BuiltIn FragCoord -OpDecorate %373 Location 0 -OpDecorate %376 Location 1 -OpDecorate %376 PerPrimitiveEXT -OpDecorate %378 Location 0 +OpDecorate %47 BuiltIn LocalInvocationIndex +OpDecorate %90 BuiltIn LocalInvocationId +OpDecorate %96 BuiltIn LocalInvocationIndex +OpDecorate %162 BuiltIn LocalInvocationIndex +OpDecorate %273 BuiltIn LocalInvocationIndex +OpDecorate %340 BuiltIn LocalInvocationId +OpDecorate %362 BuiltIn LocalInvocationIndex +OpDecorate %435 BuiltIn FragCoord +OpDecorate %438 Location 0 +OpDecorate %441 Location 1 +OpDecorate %441 PerPrimitiveEXT +OpDecorate %443 Location 0 %2 = OpTypeVoid %3 = OpTypeFloat 32 %4 = OpTypeVector %3 4 @@ -106,536 +109,618 @@ OpDecorate %378 Location 0 %19 = OpVariable %20 Workgroup %22 = OpTypePointer Workgroup %16 %21 = OpVariable %22 Workgroup -%25 = OpTypeFunction %2 -%26 = OpConstant %3 1 -%27 = OpConstant %3 0 -%28 = OpConstantComposite %4 %26 %26 %27 %26 -%29 = OpConstantTrue %5 -%30 = OpConstantComposite %9 %15 %15 %15 -%32 = OpConstantNull %3 -%34 = OpTypePointer Input %9 -%33 = OpVariable %34 Input -%36 = OpConstantNull %9 -%37 = OpTypeVector %5 3 -%42 = OpConstant %8 2 -%43 = OpConstant %8 264 -%45 = OpTypePointer TaskPayloadWorkgroupEXT %4 -%46 = OpConstant %8 0 -%48 = OpTypePointer TaskPayloadWorkgroupEXT %5 -%55 = OpTypePointer Input %8 -%54 = OpVariable %55 Input -%58 = OpConstantComposite %9 %42 %42 %42 -%72 = OpVariable %55 Input -%76 = OpTypePointer Function %8 -%78 = OpTypeStruct %4 -%79 = OpTypeArray %78 %13 -%80 = OpTypePointer Output %79 -%81 = OpVariable %80 Output -%82 = OpTypeStruct %5 -%83 = OpTypeArray %82 %15 -%84 = OpTypePointer Output %83 -%85 = OpVariable %84 Output -%86 = OpTypeArray %4 %13 -%87 = OpTypePointer Output %86 -%88 = OpVariable %87 Output -%89 = OpTypeArray %9 %15 -%90 = OpTypePointer Output %89 -%91 = OpVariable %90 Output -%92 = OpTypeArray %4 %15 -%93 = OpTypePointer Output %92 -%94 = OpVariable %93 Output -%96 = OpConstant %3 2 -%97 = OpConstantComposite %4 %27 %26 %27 %26 -%98 = OpConstant %3 -1 -%99 = OpConstantComposite %4 %98 %98 %27 %26 -%100 = OpConstantComposite %4 %27 %27 %26 %26 -%101 = OpConstantComposite %4 %26 %98 %27 %26 -%102 = OpConstantComposite %4 %26 %27 %27 %26 -%103 = OpConstantComposite %9 %46 %15 %42 -%104 = OpConstantComposite %4 %26 %27 %26 %26 -%106 = OpConstantNull %16 -%107 = OpVariable %34 Input -%114 = OpTypePointer Workgroup %8 -%117 = OpTypePointer Workgroup %12 -%118 = OpTypePointer Workgroup %7 -%119 = OpTypePointer Workgroup %4 -%135 = OpTypePointer Workgroup %14 -%136 = OpTypePointer Workgroup %10 -%137 = OpTypePointer Workgroup %9 -%139 = OpTypePointer Workgroup %5 -%161 = OpTypePointer Output %4 -%176 = OpTypePointer Output %9 -%180 = OpTypePointer Output %5 -%192 = OpVariable %55 Input -%197 = OpTypeStruct %4 -%198 = OpTypeArray %197 %13 -%199 = OpTypePointer Output %198 -%200 = OpVariable %199 Output -%201 = OpTypeStruct %5 -%202 = OpTypeArray %201 %15 -%203 = OpTypePointer Output %202 -%204 = OpVariable %203 Output -%205 = OpTypeArray %4 %13 -%206 = OpTypePointer Output %205 -%207 = OpVariable %206 Output -%208 = OpTypeArray %9 %15 -%209 = OpTypePointer Output %208 -%210 = OpVariable %209 Output -%211 = OpTypeArray %4 %15 -%212 = OpTypePointer Output %211 -%213 = OpVariable %212 Output -%215 = OpConstantFalse %5 -%217 = OpVariable %34 Input -%279 = OpVariable %55 Input -%284 = OpTypeStruct %4 -%285 = OpTypeArray %284 %13 -%286 = OpTypePointer Output %285 -%287 = OpVariable %286 Output -%288 = OpTypeStruct %5 -%289 = OpTypeArray %288 %15 -%290 = OpTypePointer Output %289 -%291 = OpVariable %290 Output -%292 = OpTypeArray %4 %13 -%293 = OpTypePointer Output %292 -%294 = OpVariable %293 Output -%295 = OpTypeArray %9 %15 -%296 = OpTypePointer Output %295 -%297 = OpVariable %296 Output -%298 = OpTypeArray %4 %15 -%299 = OpTypePointer Output %298 -%300 = OpVariable %299 Output -%303 = OpVariable %34 Input -%371 = OpTypePointer Input %4 -%370 = OpVariable %371 Input -%373 = OpVariable %371 Input -%376 = OpVariable %371 Input -%378 = OpVariable %161 Output -%24 = OpFunction %2 None %25 +%25 = OpTypeFunction %5 +%27 = OpTypePointer TaskPayloadWorkgroupEXT %5 +%33 = OpTypeFunction %2 %5 +%38 = OpTypeFunction %9 +%39 = OpConstant %3 1 +%40 = OpConstant %3 0 +%41 = OpConstantComposite %4 %39 %39 %40 %39 +%42 = OpConstantTrue %5 +%43 = OpConstantComposite %9 %15 %15 %15 +%45 = OpConstantNull %6 +%46 = OpConstantNull %3 +%48 = OpTypePointer Input %8 +%47 = OpVariable %48 Input +%50 = OpConstant %8 0 +%54 = OpConstant %8 2 +%55 = OpConstant %8 264 +%57 = OpTypePointer TaskPayloadWorkgroupEXT %4 +%63 = OpTypeFunction %2 +%67 = OpTypeInt 64 0 +%66 = OpConstant %67 256 +%68 = OpConstant %67 1024 +%91 = OpTypePointer Input %9 +%90 = OpVariable %91 Input +%94 = OpConstantComposite %9 %54 %54 %54 +%96 = OpVariable %48 Input +%132 = OpVariable %48 Input +%133 = OpTypeStruct %4 +%134 = OpTypeArray %133 %13 +%135 = OpTypePointer Output %134 +%136 = OpVariable %135 Output +%137 = OpTypeStruct %5 +%138 = OpTypeArray %137 %15 +%139 = OpTypePointer Output %138 +%140 = OpVariable %139 Output +%141 = OpTypeArray %4 %13 +%142 = OpTypePointer Output %141 +%143 = OpVariable %142 Output +%144 = OpTypeArray %9 %15 +%145 = OpTypePointer Output %144 +%146 = OpVariable %145 Output +%147 = OpTypeArray %4 %15 +%148 = OpTypePointer Output %147 +%149 = OpVariable %148 Output +%151 = OpConstant %3 2 +%152 = OpConstantComposite %4 %40 %39 %40 %39 +%153 = OpConstant %3 -1 +%154 = OpConstantComposite %4 %153 %153 %40 %39 +%155 = OpConstantComposite %4 %40 %40 %39 %39 +%156 = OpConstantComposite %4 %39 %153 %40 %39 +%157 = OpConstantComposite %4 %39 %40 %40 %39 +%158 = OpConstantComposite %9 %50 %15 %54 +%159 = OpConstantComposite %4 %39 %40 %39 %39 +%161 = OpConstantNull %16 +%162 = OpVariable %48 Input +%168 = OpTypePointer Workgroup %8 +%171 = OpTypePointer Workgroup %12 +%172 = OpTypePointer Workgroup %7 +%173 = OpTypePointer Workgroup %4 +%189 = OpTypePointer Workgroup %14 +%190 = OpTypePointer Workgroup %10 +%191 = OpTypePointer Workgroup %9 +%193 = OpTypePointer Workgroup %5 +%202 = OpTypePointer Function %8 +%221 = OpTypePointer Output %4 +%236 = OpTypePointer Output %9 +%240 = OpTypePointer Output %5 +%252 = OpVariable %48 Input +%253 = OpTypeStruct %4 +%254 = OpTypeArray %253 %13 +%255 = OpTypePointer Output %254 +%256 = OpVariable %255 Output +%257 = OpTypeStruct %5 +%258 = OpTypeArray %257 %15 +%259 = OpTypePointer Output %258 +%260 = OpVariable %259 Output +%261 = OpTypeArray %4 %13 +%262 = OpTypePointer Output %261 +%263 = OpVariable %262 Output +%264 = OpTypeArray %9 %15 +%265 = OpTypePointer Output %264 +%266 = OpVariable %265 Output +%267 = OpTypeArray %4 %15 +%268 = OpTypePointer Output %267 +%269 = OpVariable %268 Output +%271 = OpConstantFalse %5 +%273 = OpVariable %48 Input +%340 = OpVariable %91 Input +%342 = OpVariable %48 Input +%343 = OpTypeStruct %4 +%344 = OpTypeArray %343 %13 +%345 = OpTypePointer Output %344 +%346 = OpVariable %345 Output +%347 = OpTypeStruct %5 +%348 = OpTypeArray %347 %15 +%349 = OpTypePointer Output %348 +%350 = OpVariable %349 Output +%351 = OpTypeArray %4 %13 +%352 = OpTypePointer Output %351 +%353 = OpVariable %352 Output +%354 = OpTypeArray %9 %15 +%355 = OpTypePointer Output %354 +%356 = OpVariable %355 Output +%357 = OpTypeArray %4 %15 +%358 = OpTypePointer Output %357 +%359 = OpVariable %358 Output +%362 = OpVariable %48 Input +%436 = OpTypePointer Input %4 +%435 = OpVariable %436 Input +%438 = OpVariable %436 Input +%441 = OpVariable %436 Input +%443 = OpVariable %221 Output +%24 = OpFunction %5 None %25 %23 = OpLabel -OpBranch %31 -%31 = OpLabel -%35 = OpLoad %9 %33 -%38 = OpIEqual %37 %35 %36 -%39 = OpAll %5 %38 -OpSelectionMerge %40 None -OpBranchConditional %39 %41 %40 -%41 = OpLabel -OpStore %19 %32 -OpBranch %40 -%40 = OpLabel -OpControlBarrier %42 %42 %43 +OpBranch %26 +%26 = OpLabel +%28 = OpAccessChain %27 %17 %15 +%29 = OpLoad %5 %28 +OpReturnValue %29 +OpFunctionEnd +%32 = OpFunction %2 None %33 +%31 = OpFunctionParameter %5 +%30 = OpLabel +OpBranch %34 +%34 = OpLabel +%35 = OpAccessChain %27 %17 %15 +OpStore %35 %31 +OpReturn +OpFunctionEnd +%37 = OpFunction %9 None %38 +%36 = OpLabel OpBranch %44 %44 = OpLabel -OpStore %19 %26 -%47 = OpAccessChain %45 %17 %46 -OpStore %47 %28 -%49 = OpAccessChain %48 %17 %15 -OpStore %49 %29 -OpControlBarrier %42 %42 %43 -%50 = OpCompositeExtract %8 %30 0 -%51 = OpCompositeExtract %8 %30 1 -%52 = OpCompositeExtract %8 %30 2 -OpEmitMeshTasksEXT %50 %51 %52 %17 -OpFunctionEnd -%57 = OpFunction %2 None %25 +%49 = OpLoad %8 %47 +%51 = OpIEqual %5 %49 %50 +OpSelectionMerge %52 None +OpBranchConditional %51 %53 %52 %53 = OpLabel -%56 = OpLoad %8 %54 -OpBranch %59 -%59 = OpLabel -%60 = OpIEqual %5 %56 %46 -OpSelectionMerge %61 None -OpBranchConditional %60 %62 %61 -%62 = OpLabel -%63 = OpAccessChain %45 %17 %46 -OpStore %63 %28 -%64 = OpAccessChain %48 %17 %15 -OpStore %64 %29 -OpControlBarrier %42 %42 %43 -%65 = OpCompositeExtract %8 %30 0 -%66 = OpCompositeExtract %8 %30 1 -%67 = OpCompositeExtract %8 %30 2 -OpEmitMeshTasksEXT %65 %66 %67 %17 -%61 = OpLabel -OpControlBarrier %42 %42 %43 -%68 = OpCompositeExtract %8 %58 0 -%69 = OpCompositeExtract %8 %58 1 -%70 = OpCompositeExtract %8 %58 2 -OpEmitMeshTasksEXT %68 %69 %70 %17 +OpStore %17 %45 +OpStore %19 %46 +OpBranch %52 +%52 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %56 +%56 = OpLabel +OpStore %19 %39 +%58 = OpAccessChain %57 %17 %50 +OpStore %58 %41 +%59 = OpFunctionCall %2 %32 %42 +%60 = OpFunctionCall %5 %24 +%61 = OpAccessChain %27 %17 %15 +OpStore %61 %60 +OpReturnValue %43 OpFunctionEnd -%95 = OpFunction %2 None %25 -%71 = OpLabel -%74 = OpVariable %76 Function -%75 = OpVariable %76 Function -%73 = OpLoad %8 %72 -OpBranch %105 +%62 = OpFunction %2 None %63 +%64 = OpLabel +%65 = OpFunctionCall %9 %37 +OpControlBarrier %54 %54 %55 +%72 = OpCompositeExtract %8 %65 0 +%69 = OpUConvert %67 %72 +%73 = OpCompositeExtract %8 %65 1 +%70 = OpUConvert %67 %73 +%74 = OpCompositeExtract %8 %65 2 +%71 = OpUConvert %67 %74 +%75 = OpIMul %67 %69 %70 +%76 = OpIMul %67 %75 %71 +%77 = OpUGreaterThanEqual %5 %76 %68 +%78 = OpUGreaterThanEqual %5 %69 %66 +%79 = OpUGreaterThanEqual %5 %70 %66 +%80 = OpUGreaterThanEqual %5 %71 %66 +%81 = OpLogicalOr %5 %77 %78 +%82 = OpLogicalOr %5 %81 %79 +%83 = OpLogicalOr %5 %82 %80 +%84 = OpCompositeConstruct %9 %50 %50 %50 +%85 = OpSelect %9 %83 %84 %65 +%86 = OpCompositeExtract %8 %85 0 +%87 = OpCompositeExtract %8 %85 1 +%88 = OpCompositeExtract %8 %85 2 +OpEmitMeshTasksEXT %86 %87 %88 %17 +OpFunctionEnd +%93 = OpFunction %9 None %38 +%89 = OpLabel +%92 = OpLoad %9 %90 +OpBranch %95 +%95 = OpLabel +%97 = OpLoad %8 %96 +%98 = OpIEqual %5 %97 %50 +OpSelectionMerge %99 None +OpBranchConditional %98 %100 %99 +%100 = OpLabel +OpStore %17 %45 +OpBranch %99 +%99 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %101 +%101 = OpLabel +%102 = OpCompositeExtract %8 %92 0 +%103 = OpIEqual %5 %102 %50 +OpSelectionMerge %104 None +OpBranchConditional %103 %105 %104 %105 = OpLabel -%108 = OpLoad %9 %107 -%109 = OpIEqual %37 %108 %36 -%110 = OpAll %5 %109 -OpSelectionMerge %111 None -OpBranchConditional %110 %112 %111 -%112 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %111 -%111 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %113 -%113 = OpLabel -%115 = OpAccessChain %114 %21 %42 -OpStore %115 %13 -%116 = OpAccessChain %114 %21 %13 -OpStore %116 %15 -OpStore %19 %96 -%120 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %120 %97 -%121 = OpAccessChain %45 %17 %46 -%122 = OpLoad %4 %121 -%123 = OpFMul %4 %97 %122 -%124 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %124 %123 -%125 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %125 %99 -%126 = OpAccessChain %45 %17 %46 -%127 = OpLoad %4 %126 -%128 = OpFMul %4 %100 %127 -%129 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %129 %128 -%130 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %130 %101 -%131 = OpAccessChain %45 %17 %46 -%132 = OpLoad %4 %131 -%133 = OpFMul %4 %102 %132 -%134 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %134 %133 -%138 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %138 %103 -%140 = OpAccessChain %48 %17 %15 -%141 = OpLoad %5 %140 -%142 = OpLogicalNot %5 %141 -%143 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %143 %142 -%144 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %144 %104 -OpBranch %77 -%77 = OpLabel -OpControlBarrier %42 %42 %43 -%145 = OpAccessChain %114 %21 %42 -%146 = OpLoad %8 %145 -%147 = OpExtInst %8 %1 UMin %146 %13 -%148 = OpAccessChain %114 %21 %13 -%149 = OpLoad %8 %148 -%150 = OpExtInst %8 %1 UMin %149 %15 -%151 = OpAccessChain %117 %21 %46 -%152 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %147 %150 -OpStore %74 %73 -OpBranch %153 -%153 = OpLabel -OpLoopMerge %155 %166 None +%106 = OpAccessChain %57 %17 %50 +OpStore %106 %41 +%107 = OpAccessChain %27 %17 %15 +OpStore %107 %42 +OpReturnValue %43 +%104 = OpLabel +OpReturnValue %94 +OpFunctionEnd +%108 = OpFunction %2 None %63 +%109 = OpLabel +%110 = OpFunctionCall %9 %93 +OpControlBarrier %54 %54 %55 +%114 = OpCompositeExtract %8 %110 0 +%111 = OpUConvert %67 %114 +%115 = OpCompositeExtract %8 %110 1 +%112 = OpUConvert %67 %115 +%116 = OpCompositeExtract %8 %110 2 +%113 = OpUConvert %67 %116 +%117 = OpIMul %67 %111 %112 +%118 = OpIMul %67 %117 %113 +%119 = OpUGreaterThanEqual %5 %118 %68 +%120 = OpUGreaterThanEqual %5 %111 %66 +%121 = OpUGreaterThanEqual %5 %112 %66 +%122 = OpUGreaterThanEqual %5 %113 %66 +%123 = OpLogicalOr %5 %119 %120 +%124 = OpLogicalOr %5 %123 %121 +%125 = OpLogicalOr %5 %124 %122 +%126 = OpCompositeConstruct %9 %50 %50 %50 +%127 = OpSelect %9 %125 %126 %110 +%128 = OpCompositeExtract %8 %127 0 +%129 = OpCompositeExtract %8 %127 1 +%130 = OpCompositeExtract %8 %127 2 +OpEmitMeshTasksEXT %128 %129 %130 %17 +OpFunctionEnd +%150 = OpFunction %2 None %63 +%131 = OpLabel +OpBranch %160 +%160 = OpLabel +%163 = OpLoad %8 %162 +%164 = OpIEqual %5 %163 %50 +OpSelectionMerge %165 None +OpBranchConditional %164 %166 %165 +%166 = OpLabel +OpStore %19 %46 +OpStore %21 %161 OpBranch %165 %165 = OpLabel -%168 = OpLoad %8 %74 -%169 = OpULessThan %5 %168 %147 -OpBranchConditional %169 %167 %155 +OpControlBarrier %54 %54 %55 +OpBranch %167 %167 = OpLabel -%157 = OpLoad %8 %74 -%158 = OpAccessChain %119 %151 %157 %46 -%159 = OpLoad %4 %158 -%160 = OpAccessChain %161 %81 %157 %46 -OpStore %160 %159 -%162 = OpAccessChain %119 %151 %157 %15 -%163 = OpLoad %4 %162 -%164 = OpAccessChain %161 %88 %157 -OpStore %164 %163 -OpBranch %166 -%166 = OpLabel -%170 = OpLoad %8 %74 -%171 = OpIAdd %8 %170 %15 -OpStore %74 %171 -OpBranch %153 -%155 = OpLabel -OpStore %75 %73 -OpBranch %154 -%154 = OpLabel -OpLoopMerge %156 %185 None -OpBranch %184 -%184 = OpLabel -%187 = OpLoad %8 %75 -%188 = OpULessThan %5 %187 %150 -OpBranchConditional %188 %186 %156 -%186 = OpLabel -%172 = OpLoad %8 %75 -%173 = OpAccessChain %137 %152 %172 %46 -%174 = OpLoad %9 %173 -%175 = OpAccessChain %176 %91 %172 -OpStore %175 %174 -%177 = OpAccessChain %139 %152 %172 %15 -%178 = OpLoad %5 %177 -%179 = OpAccessChain %180 %85 %172 %46 -OpStore %179 %178 -%181 = OpAccessChain %119 %152 %172 %42 -%182 = OpLoad %4 %181 -%183 = OpAccessChain %161 %94 %172 +%169 = OpAccessChain %168 %21 %54 +OpStore %169 %13 +%170 = OpAccessChain %168 %21 %13 +OpStore %170 %15 +OpStore %19 %151 +%174 = OpAccessChain %173 %21 %50 %50 %50 +OpStore %174 %152 +%175 = OpAccessChain %57 %17 %50 +%176 = OpLoad %4 %175 +%177 = OpFMul %4 %152 %176 +%178 = OpAccessChain %173 %21 %50 %50 %15 +OpStore %178 %177 +%179 = OpAccessChain %173 %21 %50 %15 %50 +OpStore %179 %154 +%180 = OpAccessChain %57 %17 %50 +%181 = OpLoad %4 %180 +%182 = OpFMul %4 %155 %181 +%183 = OpAccessChain %173 %21 %50 %15 %15 OpStore %183 %182 -OpBranch %185 -%185 = OpLabel -%189 = OpLoad %8 %75 -%190 = OpIAdd %8 %189 %15 -OpStore %75 %190 -OpBranch %154 -%156 = OpLabel +%184 = OpAccessChain %173 %21 %50 %54 %50 +OpStore %184 %156 +%185 = OpAccessChain %57 %17 %50 +%186 = OpLoad %4 %185 +%187 = OpFMul %4 %157 %186 +%188 = OpAccessChain %173 %21 %50 %54 %15 +OpStore %188 %187 +%192 = OpAccessChain %191 %21 %15 %50 %50 +OpStore %192 %158 +%194 = OpFunctionCall %5 %24 +%195 = OpLogicalNot %5 %194 +%196 = OpAccessChain %193 %21 %15 %50 %15 +OpStore %196 %195 +%197 = OpAccessChain %173 %21 %15 %50 %54 +OpStore %197 %159 OpReturn OpFunctionEnd -%214 = OpFunction %2 None %25 -%191 = OpLabel -%194 = OpVariable %76 Function -%195 = OpVariable %76 Function -%193 = OpLoad %8 %192 -OpBranch %216 -%216 = OpLabel -%218 = OpLoad %9 %217 -%219 = OpIEqual %37 %218 %36 -%220 = OpAll %5 %219 -OpSelectionMerge %221 None -OpBranchConditional %220 %222 %221 -%222 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %221 -%221 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %223 -%223 = OpLabel -%224 = OpAccessChain %114 %21 %42 -OpStore %224 %13 -%225 = OpAccessChain %114 %21 %13 -OpStore %225 %15 -OpStore %19 %96 -%226 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %226 %97 -%227 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %227 %97 -%228 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %228 %99 -%229 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %229 %100 -%230 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %230 %101 -%231 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %231 %102 -%232 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %232 %103 -%233 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %233 %215 -%234 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %234 %104 -OpBranch %196 -%196 = OpLabel -OpControlBarrier %42 %42 %43 -%235 = OpAccessChain %114 %21 %42 -%236 = OpLoad %8 %235 -%237 = OpExtInst %8 %1 UMin %236 %13 -%238 = OpAccessChain %114 %21 %13 -%239 = OpLoad %8 %238 -%240 = OpExtInst %8 %1 UMin %239 %15 -%241 = OpAccessChain %117 %21 %46 -%242 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %237 %240 -OpStore %194 %193 -OpBranch %243 -%243 = OpLabel -OpLoopMerge %245 %255 None -OpBranch %254 -%254 = OpLabel -%257 = OpLoad %8 %194 -%258 = OpULessThan %5 %257 %237 -OpBranchConditional %258 %256 %245 -%256 = OpLabel -%247 = OpLoad %8 %194 -%248 = OpAccessChain %119 %241 %247 %46 -%249 = OpLoad %4 %248 -%250 = OpAccessChain %161 %200 %247 %46 -OpStore %250 %249 -%251 = OpAccessChain %119 %241 %247 %15 -%252 = OpLoad %4 %251 -%253 = OpAccessChain %161 %207 %247 -OpStore %253 %252 -OpBranch %255 -%255 = OpLabel -%259 = OpLoad %8 %194 -%260 = OpIAdd %8 %259 %15 -OpStore %194 %260 -OpBranch %243 -%245 = OpLabel -OpStore %195 %193 +%198 = OpFunction %2 None %63 +%199 = OpLabel +%200 = OpVariable %202 Function +%201 = OpVariable %202 Function +%203 = OpLoad %8 %132 +%204 = OpFunctionCall %2 %150 +OpControlBarrier %54 %54 %55 +%205 = OpAccessChain %168 %21 %54 +%206 = OpLoad %8 %205 +%207 = OpExtInst %8 %1 UMin %206 %13 +%208 = OpAccessChain %168 %21 %13 +%209 = OpLoad %8 %208 +%210 = OpExtInst %8 %1 UMin %209 %15 +%211 = OpAccessChain %171 %21 %50 +%212 = OpAccessChain %189 %21 %15 +OpSetMeshOutputsEXT %207 %210 +OpStore %200 %203 +OpBranch %213 +%213 = OpLabel +OpLoopMerge %215 %226 None +OpBranch %225 +%225 = OpLabel +%228 = OpLoad %8 %200 +%229 = OpULessThan %5 %228 %207 +OpBranchConditional %229 %227 %215 +%227 = OpLabel +%217 = OpLoad %8 %200 +%218 = OpAccessChain %173 %211 %217 %50 +%219 = OpLoad %4 %218 +%220 = OpAccessChain %221 %136 %217 %50 +OpStore %220 %219 +%222 = OpAccessChain %173 %211 %217 %15 +%223 = OpLoad %4 %222 +%224 = OpAccessChain %221 %143 %217 +OpStore %224 %223 +OpBranch %226 +%226 = OpLabel +%230 = OpLoad %8 %200 +%231 = OpIAdd %8 %230 %15 +OpStore %200 %231 +OpBranch %213 +%215 = OpLabel +OpStore %201 %203 +OpBranch %214 +%214 = OpLabel +OpLoopMerge %216 %245 None OpBranch %244 %244 = OpLabel -OpLoopMerge %246 %272 None -OpBranch %271 -%271 = OpLabel -%274 = OpLoad %8 %195 -%275 = OpULessThan %5 %274 %240 -OpBranchConditional %275 %273 %246 -%273 = OpLabel -%261 = OpLoad %8 %195 -%262 = OpAccessChain %137 %242 %261 %46 -%263 = OpLoad %9 %262 -%264 = OpAccessChain %176 %210 %261 -OpStore %264 %263 -%265 = OpAccessChain %139 %242 %261 %15 -%266 = OpLoad %5 %265 -%267 = OpAccessChain %180 %204 %261 %46 -OpStore %267 %266 -%268 = OpAccessChain %119 %242 %261 %42 -%269 = OpLoad %4 %268 -%270 = OpAccessChain %161 %213 %261 -OpStore %270 %269 -OpBranch %272 -%272 = OpLabel -%276 = OpLoad %8 %195 -%277 = OpIAdd %8 %276 %15 -OpStore %195 %277 -OpBranch %244 +%247 = OpLoad %8 %201 +%248 = OpULessThan %5 %247 %210 +OpBranchConditional %248 %246 %216 %246 = OpLabel +%232 = OpLoad %8 %201 +%233 = OpAccessChain %191 %212 %232 %50 +%234 = OpLoad %9 %233 +%235 = OpAccessChain %236 %146 %232 +OpStore %235 %234 +%237 = OpAccessChain %193 %212 %232 %15 +%238 = OpLoad %5 %237 +%239 = OpAccessChain %240 %140 %232 %50 +OpStore %239 %238 +%241 = OpAccessChain %173 %212 %232 %54 +%242 = OpLoad %4 %241 +%243 = OpAccessChain %221 %149 %232 +OpStore %243 %242 +OpBranch %245 +%245 = OpLabel +%249 = OpLoad %8 %201 +%250 = OpIAdd %8 %249 %15 +OpStore %201 %250 +OpBranch %214 +%216 = OpLabel OpReturn OpFunctionEnd -%301 = OpFunction %2 None %25 +%270 = OpFunction %2 None %63 +%251 = OpLabel +OpBranch %272 +%272 = OpLabel +%274 = OpLoad %8 %273 +%275 = OpIEqual %5 %274 %50 +OpSelectionMerge %276 None +OpBranchConditional %275 %277 %276 +%277 = OpLabel +OpStore %19 %46 +OpStore %21 %161 +OpBranch %276 +%276 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %278 %278 = OpLabel -%281 = OpVariable %76 Function -%282 = OpVariable %76 Function -%280 = OpLoad %8 %279 -OpBranch %302 -%302 = OpLabel -%304 = OpLoad %9 %303 -%305 = OpIEqual %37 %304 %36 -%306 = OpAll %5 %305 -OpSelectionMerge %307 None -OpBranchConditional %306 %308 %307 -%308 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %307 -%307 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %309 -%309 = OpLabel -%310 = OpIEqual %5 %280 %46 -OpSelectionMerge %311 None -OpBranchConditional %310 %312 %313 -%312 = OpLabel -%314 = OpAccessChain %114 %21 %42 -OpStore %314 %13 -%315 = OpAccessChain %114 %21 %13 -OpStore %315 %15 -OpStore %19 %96 -%316 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %316 %97 -%317 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %317 %97 -%318 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %318 %99 -%319 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %319 %100 -%320 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %320 %101 -%321 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %321 %102 -%322 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %322 %103 -%323 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %323 %215 -%324 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %324 %104 -OpBranch %283 -%313 = OpLabel -OpBranch %283 -%311 = OpLabel +%279 = OpAccessChain %168 %21 %54 +OpStore %279 %13 +%280 = OpAccessChain %168 %21 %13 +OpStore %280 %15 +OpStore %19 %151 +%281 = OpAccessChain %173 %21 %50 %50 %50 +OpStore %281 %152 +%282 = OpAccessChain %173 %21 %50 %50 %15 +OpStore %282 %152 +%283 = OpAccessChain %173 %21 %50 %15 %50 +OpStore %283 %154 +%284 = OpAccessChain %173 %21 %50 %15 %15 +OpStore %284 %155 +%285 = OpAccessChain %173 %21 %50 %54 %50 +OpStore %285 %156 +%286 = OpAccessChain %173 %21 %50 %54 %15 +OpStore %286 %157 +%287 = OpAccessChain %191 %21 %15 %50 %50 +OpStore %287 %158 +%288 = OpAccessChain %193 %21 %15 %50 %15 +OpStore %288 %271 +%289 = OpAccessChain %173 %21 %15 %50 %54 +OpStore %289 %159 OpReturn -%283 = OpLabel -OpControlBarrier %42 %42 %43 -%325 = OpAccessChain %114 %21 %42 -%326 = OpLoad %8 %325 -%327 = OpExtInst %8 %1 UMin %326 %13 -%328 = OpAccessChain %114 %21 %13 -%329 = OpLoad %8 %328 -%330 = OpExtInst %8 %1 UMin %329 %15 -%331 = OpAccessChain %117 %21 %46 -%332 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %327 %330 -OpStore %281 %280 +OpFunctionEnd +%290 = OpFunction %2 None %63 +%291 = OpLabel +%292 = OpVariable %202 Function +%293 = OpVariable %202 Function +%294 = OpLoad %8 %252 +%295 = OpFunctionCall %2 %270 +OpControlBarrier %54 %54 %55 +%296 = OpAccessChain %168 %21 %54 +%297 = OpLoad %8 %296 +%298 = OpExtInst %8 %1 UMin %297 %13 +%299 = OpAccessChain %168 %21 %13 +%300 = OpLoad %8 %299 +%301 = OpExtInst %8 %1 UMin %300 %15 +%302 = OpAccessChain %171 %21 %50 +%303 = OpAccessChain %189 %21 %15 +OpSetMeshOutputsEXT %298 %301 +OpStore %292 %294 +OpBranch %304 +%304 = OpLabel +OpLoopMerge %306 %316 None +OpBranch %315 +%315 = OpLabel +%318 = OpLoad %8 %292 +%319 = OpULessThan %5 %318 %298 +OpBranchConditional %319 %317 %306 +%317 = OpLabel +%308 = OpLoad %8 %292 +%309 = OpAccessChain %173 %302 %308 %50 +%310 = OpLoad %4 %309 +%311 = OpAccessChain %221 %256 %308 %50 +OpStore %311 %310 +%312 = OpAccessChain %173 %302 %308 %15 +%313 = OpLoad %4 %312 +%314 = OpAccessChain %221 %263 %308 +OpStore %314 %313 +OpBranch %316 +%316 = OpLabel +%320 = OpLoad %8 %292 +%321 = OpIAdd %8 %320 %15 +OpStore %292 %321 +OpBranch %304 +%306 = OpLabel +OpStore %293 %294 +OpBranch %305 +%305 = OpLabel +OpLoopMerge %307 %333 None +OpBranch %332 +%332 = OpLabel +%335 = OpLoad %8 %293 +%336 = OpULessThan %5 %335 %301 +OpBranchConditional %336 %334 %307 +%334 = OpLabel +%322 = OpLoad %8 %293 +%323 = OpAccessChain %191 %303 %322 %50 +%324 = OpLoad %9 %323 +%325 = OpAccessChain %236 %266 %322 +OpStore %325 %324 +%326 = OpAccessChain %193 %303 %322 %15 +%327 = OpLoad %5 %326 +%328 = OpAccessChain %240 %260 %322 %50 +OpStore %328 %327 +%329 = OpAccessChain %173 %303 %322 %54 +%330 = OpLoad %4 %329 +%331 = OpAccessChain %221 %269 %322 +OpStore %331 %330 OpBranch %333 %333 = OpLabel -OpLoopMerge %335 %345 None -OpBranch %344 -%344 = OpLabel -%347 = OpLoad %8 %281 -%348 = OpULessThan %5 %347 %327 -OpBranchConditional %348 %346 %335 -%346 = OpLabel -%337 = OpLoad %8 %281 -%338 = OpAccessChain %119 %331 %337 %46 -%339 = OpLoad %4 %338 -%340 = OpAccessChain %161 %287 %337 %46 -OpStore %340 %339 -%341 = OpAccessChain %119 %331 %337 %15 -%342 = OpLoad %4 %341 -%343 = OpAccessChain %161 %294 %337 -OpStore %343 %342 -OpBranch %345 -%345 = OpLabel -%349 = OpLoad %8 %281 -%350 = OpIAdd %8 %349 %15 -OpStore %281 %350 -OpBranch %333 -%335 = OpLabel -OpStore %282 %280 -OpBranch %334 -%334 = OpLabel -OpLoopMerge %336 %362 None +%337 = OpLoad %8 %293 +%338 = OpIAdd %8 %337 %15 +OpStore %293 %338 +OpBranch %305 +%307 = OpLabel +OpReturn +OpFunctionEnd +%360 = OpFunction %2 None %63 +%339 = OpLabel +%341 = OpLoad %9 %340 OpBranch %361 %361 = OpLabel -%364 = OpLoad %8 %282 -%365 = OpULessThan %5 %364 %330 -OpBranchConditional %365 %363 %336 -%363 = OpLabel -%351 = OpLoad %8 %282 -%352 = OpAccessChain %137 %332 %351 %46 -%353 = OpLoad %9 %352 -%354 = OpAccessChain %176 %297 %351 -OpStore %354 %353 -%355 = OpAccessChain %139 %332 %351 %15 -%356 = OpLoad %5 %355 -%357 = OpAccessChain %180 %291 %351 %46 -OpStore %357 %356 -%358 = OpAccessChain %119 %332 %351 %42 -%359 = OpLoad %4 %358 -%360 = OpAccessChain %161 %300 %351 -OpStore %360 %359 -OpBranch %362 -%362 = OpLabel -%366 = OpLoad %8 %282 -%367 = OpIAdd %8 %366 %15 -OpStore %282 %367 -OpBranch %334 -%336 = OpLabel +%363 = OpLoad %8 %362 +%364 = OpIEqual %5 %363 %50 +OpSelectionMerge %365 None +OpBranchConditional %364 %366 %365 +%366 = OpLabel +OpStore %19 %46 +OpStore %21 %161 +OpBranch %365 +%365 = OpLabel +OpControlBarrier %54 %54 %55 +OpBranch %367 +%367 = OpLabel +%368 = OpCompositeExtract %8 %341 0 +%369 = OpIEqual %5 %368 %50 +OpSelectionMerge %370 None +OpBranchConditional %369 %371 %372 +%371 = OpLabel +%373 = OpAccessChain %168 %21 %54 +OpStore %373 %13 +%374 = OpAccessChain %168 %21 %13 +OpStore %374 %15 +OpStore %19 %151 +%375 = OpAccessChain %173 %21 %50 %50 %50 +OpStore %375 %152 +%376 = OpAccessChain %173 %21 %50 %50 %15 +OpStore %376 %152 +%377 = OpAccessChain %173 %21 %50 %15 %50 +OpStore %377 %154 +%378 = OpAccessChain %173 %21 %50 %15 %15 +OpStore %378 %155 +%379 = OpAccessChain %173 %21 %50 %54 %50 +OpStore %379 %156 +%380 = OpAccessChain %173 %21 %50 %54 %15 +OpStore %380 %157 +%381 = OpAccessChain %191 %21 %15 %50 %50 +OpStore %381 %158 +%382 = OpAccessChain %193 %21 %15 %50 %15 +OpStore %382 %271 +%383 = OpAccessChain %173 %21 %15 %50 %54 +OpStore %383 %159 +OpReturn +%372 = OpLabel +OpReturn +%370 = OpLabel +OpReturn +OpFunctionEnd +%384 = OpFunction %2 None %63 +%385 = OpLabel +%386 = OpVariable %202 Function +%387 = OpVariable %202 Function +%388 = OpLoad %8 %342 +%389 = OpFunctionCall %2 %360 +OpControlBarrier %54 %54 %55 +%390 = OpAccessChain %168 %21 %54 +%391 = OpLoad %8 %390 +%392 = OpExtInst %8 %1 UMin %391 %13 +%393 = OpAccessChain %168 %21 %13 +%394 = OpLoad %8 %393 +%395 = OpExtInst %8 %1 UMin %394 %15 +%396 = OpAccessChain %171 %21 %50 +%397 = OpAccessChain %189 %21 %15 +OpSetMeshOutputsEXT %392 %395 +OpStore %386 %388 +OpBranch %398 +%398 = OpLabel +OpLoopMerge %400 %410 None +OpBranch %409 +%409 = OpLabel +%412 = OpLoad %8 %386 +%413 = OpULessThan %5 %412 %392 +OpBranchConditional %413 %411 %400 +%411 = OpLabel +%402 = OpLoad %8 %386 +%403 = OpAccessChain %173 %396 %402 %50 +%404 = OpLoad %4 %403 +%405 = OpAccessChain %221 %346 %402 %50 +OpStore %405 %404 +%406 = OpAccessChain %173 %396 %402 %15 +%407 = OpLoad %4 %406 +%408 = OpAccessChain %221 %353 %402 +OpStore %408 %407 +OpBranch %410 +%410 = OpLabel +%414 = OpLoad %8 %386 +%415 = OpIAdd %8 %414 %54 +OpStore %386 %415 +OpBranch %398 +%400 = OpLabel +OpStore %387 %388 +OpBranch %399 +%399 = OpLabel +OpLoopMerge %401 %427 None +OpBranch %426 +%426 = OpLabel +%429 = OpLoad %8 %387 +%430 = OpULessThan %5 %429 %395 +OpBranchConditional %430 %428 %401 +%428 = OpLabel +%416 = OpLoad %8 %387 +%417 = OpAccessChain %191 %397 %416 %50 +%418 = OpLoad %9 %417 +%419 = OpAccessChain %236 %356 %416 +OpStore %419 %418 +%420 = OpAccessChain %193 %397 %416 %15 +%421 = OpLoad %5 %420 +%422 = OpAccessChain %240 %350 %416 %50 +OpStore %422 %421 +%423 = OpAccessChain %173 %397 %416 %54 +%424 = OpLoad %4 %423 +%425 = OpAccessChain %221 %359 %416 +OpStore %425 %424 +OpBranch %427 +%427 = OpLabel +%431 = OpLoad %8 %387 +%432 = OpIAdd %8 %431 %54 +OpStore %387 %432 +OpBranch %399 +%401 = OpLabel OpReturn OpFunctionEnd -%379 = OpFunction %2 None %25 -%368 = OpLabel -%372 = OpLoad %4 %370 -%374 = OpLoad %4 %373 -%369 = OpCompositeConstruct %7 %372 %374 -%377 = OpLoad %4 %376 -%375 = OpCompositeConstruct %11 %377 -OpBranch %380 -%380 = OpLabel -%381 = OpCompositeExtract %4 %369 1 -%382 = OpCompositeExtract %4 %375 0 -%383 = OpFMul %4 %381 %382 -OpStore %378 %383 +%444 = OpFunction %2 None %63 +%433 = OpLabel +%437 = OpLoad %4 %435 +%439 = OpLoad %4 %438 +%434 = OpCompositeConstruct %7 %437 %439 +%442 = OpLoad %4 %441 +%440 = OpCompositeConstruct %11 %442 +OpBranch %445 +%445 = OpLabel +%446 = OpCompositeExtract %4 %434 1 +%447 = OpCompositeExtract %4 %440 0 +%448 = OpFMul %4 %446 %447 +OpStore %443 %448 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm b/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm index 45e71af552b..f32cd756b11 100644 --- a/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm +++ b/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm @@ -1,15 +1,15 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 33 +; Bound: 29 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %11 "f" %18 +OpEntryPoint GLCompute %11 "f" %17 OpExecutionMode %11 LocalSize 1 1 1 OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 4 -OpDecorate %18 BuiltIn LocalInvocationId +OpDecorate %17 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 1 %4 = OpTypeInt 32 0 @@ -22,31 +22,27 @@ OpDecorate %18 BuiltIn LocalInvocationId %13 = OpConstant %4 2 %14 = OpConstant %4 1 %16 = OpConstantNull %4 -%17 = OpTypeVector %4 3 -%19 = OpTypePointer Input %17 -%18 = OpVariable %19 Input -%21 = OpConstantNull %17 -%22 = OpTypeVector %5 3 -%27 = OpConstant %4 264 -%30 = OpConstant %4 0 +%18 = OpTypePointer Input %4 +%17 = OpVariable %18 Input +%20 = OpConstant %4 0 +%24 = OpConstant %4 264 %11 = OpFunction %2 None %12 %10 = OpLabel OpBranch %15 %15 = OpLabel -%20 = OpLoad %17 %18 -%23 = OpIEqual %22 %20 %21 -%24 = OpAll %5 %23 -OpSelectionMerge %25 None -OpBranchConditional %24 %26 %25 -%26 = OpLabel +%19 = OpLoad %4 %17 +%21 = OpIEqual %5 %19 %20 +OpSelectionMerge %22 None +OpBranchConditional %21 %23 %22 +%23 = OpLabel OpStore %8 %16 +OpBranch %22 +%22 = OpLabel +OpControlBarrier %13 %13 %24 OpBranch %25 %25 = OpLabel -OpControlBarrier %13 %13 %27 -OpBranch %28 -%28 = OpLabel -%31 = OpAtomicCompareExchange %4 %8 %7 %30 %30 %14 %13 -%32 = OpIEqual %5 %31 %13 -%29 = OpCompositeConstruct %6 %31 %32 +%27 = OpAtomicCompareExchange %4 %8 %7 %20 %20 %14 %13 +%28 = OpIEqual %5 %27 %13 +%26 = OpCompositeConstruct %6 %27 %28 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-policy-mix.spvasm b/naga/tests/out/spv/wgsl-policy-mix.spvasm index 8d9209d7b35..23f33133493 100644 --- a/naga/tests/out/spv/wgsl-policy-mix.spvasm +++ b/naga/tests/out/spv/wgsl-policy-mix.spvasm @@ -1,13 +1,13 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 126 +; Bound: 123 OpCapability Shader OpCapability ImageQuery OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %102 "main" %115 +OpEntryPoint GLCompute %102 "main" %114 OpExecutionMode %102 LocalSize 1 1 1 %3 = OpString "policy-mix.wgsl" OpSource Unknown 0 %3 "// Tests that the index, buffer, and texture bounds checks policies are @@ -82,7 +82,7 @@ OpDecorate %26 Block OpMemberDecorate %26 0 Offset 0 OpDecorate %28 DescriptorSet 0 OpDecorate %28 Binding 2 -OpDecorate %115 BuiltIn LocalInvocationId +OpDecorate %114 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %4 = OpTypeFloat 32 %5 = OpTypeVector %4 4 @@ -147,11 +147,9 @@ OpDecorate %115 BuiltIn LocalInvocationId %110 = OpConstant %18 3 %111 = OpConstant %18 4 %113 = OpConstantNull %14 -%114 = OpTypeVector %8 3 -%116 = OpTypePointer Input %114 -%115 = OpVariable %116 Input -%118 = OpConstantNull %114 -%123 = OpConstant %8 264 +%115 = OpTypePointer Input %8 +%114 = OpVariable %115 Input +%120 = OpConstant %8 264 %39 = OpFunction %5 None %40 %36 = OpFunctionParameter %19 %37 = OpFunctionParameter %18 @@ -214,21 +212,20 @@ OpFunctionEnd %106 = OpLoad %13 %28 OpBranch %112 %112 = OpLabel -%117 = OpLoad %114 %115 -%119 = OpIEqual %74 %117 %118 -%120 = OpAll %67 %119 -OpSelectionMerge %121 None -OpBranchConditional %120 %122 %121 -%122 = OpLabel +%116 = OpLoad %8 %114 +%117 = OpIEqual %67 %116 %42 +OpSelectionMerge %118 None +OpBranchConditional %117 %119 %118 +%119 = OpLabel OpStore %30 %113 +OpBranch %118 +%118 = OpLabel +OpControlBarrier %21 %21 %120 OpBranch %121 %121 = OpLabel -OpControlBarrier %21 %21 %123 -OpBranch %124 -%124 = OpLabel OpLine %3 37 19 OpLine %3 37 19 OpLine %3 37 5 -%125 = OpFunctionCall %5 %39 %109 %110 %111 +%122 = OpFunctionCall %5 %39 %109 %110 %111 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-workgroup-uniform-load-atomic.spvasm b/naga/tests/out/spv/wgsl-workgroup-uniform-load-atomic.spvasm index 616ca534daa..2d88d2f0414 100644 --- a/naga/tests/out/spv/wgsl-workgroup-uniform-load-atomic.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-uniform-load-atomic.spvasm @@ -5,13 +5,14 @@ OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %22 "test_atomic_workgroup_uniform_load" %17 %20 +OpEntryPoint GLCompute %22 "test_atomic_workgroup_uniform_load" %17 %20 %42 OpExecutionMode %22 LocalSize 64 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 4 OpDecorate %17 BuiltIn WorkgroupId OpDecorate %20 BuiltIn LocalInvocationId +OpDecorate %42 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -44,8 +45,8 @@ OpDecorate %20 BuiltIn LocalInvocationId %39 = OpConstantNull %3 %40 = OpConstantNull %4 %41 = OpConstantNull %7 -%42 = OpConstantNull %8 -%43 = OpTypeVector %9 3 +%43 = OpTypePointer Input %3 +%42 = OpVariable %43 Input %48 = OpConstant %3 264 %57 = OpConstant %4 2 %60 = OpTypePointer Workgroup %5 @@ -58,8 +59,8 @@ OpDecorate %20 BuiltIn LocalInvocationId %21 = OpLoad %8 %20 OpBranch %38 %38 = OpLabel -%44 = OpIEqual %43 %21 %42 -%45 = OpAll %9 %44 +%44 = OpLoad %3 %42 +%45 = OpIEqual %9 %44 %28 OpSelectionMerge %46 None OpBranchConditional %45 %47 %46 %47 = OpLabel diff --git a/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm b/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm index c2d0de7a044..e82a9a70baa 100644 --- a/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 39 +; Bound: 38 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -9,7 +9,7 @@ OpEntryPoint GLCompute %14 "test_workgroupUniformLoad" %11 %19 OpExecutionMode %14 LocalSize 4 1 1 OpDecorate %5 ArrayStride 4 OpDecorate %11 BuiltIn WorkgroupId -OpDecorate %19 BuiltIn LocalInvocationId +OpDecorate %19 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -23,43 +23,42 @@ OpDecorate %19 BuiltIn LocalInvocationId %15 = OpTypeFunction %2 %16 = OpConstant %4 10 %18 = OpConstantNull %5 -%19 = OpVariable %12 Input -%21 = OpConstantNull %7 -%23 = OpTypeBool -%22 = OpTypeVector %23 3 -%28 = OpConstant %3 2 -%29 = OpConstant %3 264 -%32 = OpTypePointer Workgroup %4 +%20 = OpTypePointer Input %3 +%19 = OpVariable %20 Input +%22 = OpConstant %3 0 +%24 = OpTypeBool +%27 = OpConstant %3 2 +%28 = OpConstant %3 264 +%31 = OpTypePointer Workgroup %4 %14 = OpFunction %2 None %15 %10 = OpLabel %13 = OpLoad %7 %11 OpBranch %17 %17 = OpLabel -%20 = OpLoad %7 %19 -%24 = OpIEqual %22 %20 %21 -%25 = OpAll %23 %24 -OpSelectionMerge %26 None -OpBranchConditional %25 %27 %26 -%27 = OpLabel -OpStore %8 %18 -OpBranch %26 +%21 = OpLoad %3 %19 +%23 = OpIEqual %24 %21 %22 +OpSelectionMerge %25 None +OpBranchConditional %23 %26 %25 %26 = OpLabel -OpControlBarrier %28 %28 %29 -OpBranch %30 -%30 = OpLabel -%31 = OpCompositeExtract %3 %13 0 -OpControlBarrier %28 %28 %29 -%33 = OpAccessChain %32 %8 %31 -%34 = OpLoad %4 %33 -OpControlBarrier %28 %28 %29 -%35 = OpSGreaterThan %23 %34 %16 -OpSelectionMerge %36 None -OpBranchConditional %35 %37 %38 -%37 = OpLabel -OpControlBarrier %28 %28 %29 +OpStore %8 %18 +OpBranch %25 +%25 = OpLabel +OpControlBarrier %27 %27 %28 +OpBranch %29 +%29 = OpLabel +%30 = OpCompositeExtract %3 %13 0 +OpControlBarrier %27 %27 %28 +%32 = OpAccessChain %31 %8 %30 +%33 = OpLoad %4 %32 +OpControlBarrier %27 %27 %28 +%34 = OpSGreaterThan %24 %33 %16 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %37 +%36 = OpLabel +OpControlBarrier %27 %27 %28 OpReturn -%38 = OpLabel +%37 = OpLabel OpReturn -%36 = OpLabel +%35 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm b/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm index cb214f3e357..801ad15e942 100644 --- a/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 42 +; Bound: 38 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %18 "main" %26 +OpEntryPoint GLCompute %18 "main" %25 OpExecutionMode %18 LocalSize 1 1 1 %3 = OpString "workgroup-var-init.wgsl" OpSource Unknown 0 %3 "struct WStruct { @@ -41,7 +41,7 @@ OpDecorate %14 DescriptorSet 0 OpDecorate %14 Binding 0 OpDecorate %15 Block OpMemberDecorate %15 0 Offset 0 -OpDecorate %26 BuiltIn LocalInvocationId +OpDecorate %25 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %4 = OpTypeInt 32 0 %6 = OpConstant %4 512 @@ -60,36 +60,32 @@ OpDecorate %26 BuiltIn LocalInvocationId %20 = OpTypePointer StorageBuffer %5 %21 = OpConstant %4 0 %24 = OpConstantNull %11 -%25 = OpTypeVector %4 3 -%27 = OpTypePointer Input %25 -%26 = OpVariable %27 Input -%29 = OpConstantNull %25 -%31 = OpTypeBool -%30 = OpTypeVector %31 3 -%36 = OpConstant %4 2 -%37 = OpConstant %4 264 -%39 = OpTypePointer Workgroup %5 +%26 = OpTypePointer Input %4 +%25 = OpVariable %26 Input +%29 = OpTypeBool +%32 = OpConstant %4 2 +%33 = OpConstant %4 264 +%35 = OpTypePointer Workgroup %5 %18 = OpFunction %2 None %19 %17 = OpLabel %22 = OpAccessChain %20 %14 %21 OpBranch %23 %23 = OpLabel -%28 = OpLoad %25 %26 -%32 = OpIEqual %30 %28 %29 -%33 = OpAll %31 %32 -OpSelectionMerge %34 None -OpBranchConditional %33 %35 %34 -%35 = OpLabel +%27 = OpLoad %4 %25 +%28 = OpIEqual %29 %27 %21 +OpSelectionMerge %30 None +OpBranchConditional %28 %31 %30 +%31 = OpLabel OpStore %12 %24 +OpBranch %30 +%30 = OpLabel +OpControlBarrier %32 %32 %33 OpBranch %34 %34 = OpLabel -OpControlBarrier %36 %36 %37 -OpBranch %38 -%38 = OpLabel OpLine %3 14 14 -%40 = OpAccessChain %39 %12 %21 -%41 = OpLoad %5 %40 +%36 = OpAccessChain %35 %12 %21 +%37 = OpLoad %5 %36 OpLine %3 14 5 -OpStore %22 %41 +OpStore %22 %37 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl index c5e853af26e..4920dbd865b 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl index fe7c341f303..85fb355995b 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl index b6eea73d08a..0407e21c675 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl index 974027fdbb4..93372453ce5 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl @@ -31,17 +31,29 @@ var taskPayload: TaskPayload; var workgroupData: f32; var mesh_output: MeshOutput; +fn helper_reader() -> bool { + let _e2 = taskPayload.visible; + return _e2; +} + +fn helper_writer(value: bool) { + taskPayload.visible = value; + return; +} + @task @payload(taskPayload) @workgroup_size(1, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { workgroupData = 1f; taskPayload.colorMask = vec4(1f, 1f, 0f, 1f); - taskPayload.visible = true; + helper_writer(true); + let _e12 = helper_reader(); + taskPayload.visible = _e12; return vec3(1u, 1u, 1u); } @task @payload(taskPayload) @workgroup_size(2, 1, 1) -fn ts_divergent(@builtin(local_invocation_index) thread_id: u32) -> @builtin(mesh_task_size) vec3 { - if (thread_id == 0u) { +fn ts_divergent(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if (thread_id.x == 0u) { taskPayload.colorMask = vec4(1f, 1f, 0f, 1f); taskPayload.visible = true; return vec3(1u, 1u, 1u); @@ -64,8 +76,8 @@ fn ms_main() { let _e67 = taskPayload.colorMask; mesh_output.vertices[2].color = (vec4(1f, 0f, 0f, 1f) * _e67); mesh_output.primitives[0].indices = vec3(0u, 1u, 2u); - let _e88 = taskPayload.visible; - mesh_output.primitives[0].cull = !(_e88); + let _e86 = helper_reader(); + mesh_output.primitives[0].cull = !(_e86); mesh_output.primitives[0].colorMask = vec4(1f, 0f, 1f, 1f); return; } @@ -87,9 +99,9 @@ fn ms_no_ts() { return; } -@mesh(mesh_output) @workgroup_size(1, 1, 1) -fn ms_divergent(@builtin(local_invocation_index) thread_id_1: u32) { - if (thread_id_1 == 0u) { +@mesh(mesh_output) @workgroup_size(2, 1, 1) +fn ms_divergent(@builtin(local_invocation_id) thread_id_1: vec3) { + if (thread_id_1.x == 0u) { mesh_output.vertex_count = 3u; mesh_output.primitive_count = 1u; workgroupData = 2f; diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs index 44728584810..3ac912c9f85 100644 --- a/naga/xtask/src/validate.rs +++ b/naga/xtask/src/validate.rs @@ -327,8 +327,13 @@ fn push_job_for_each_hlsl_config_item( vertex, fragment, compute, + task, + mesh, } = hlsl_snapshots::Config::from_path(path.with_extension("ron"))?; - for shader in [vertex, fragment, compute].into_iter().flatten() { + for shader in [vertex, fragment, compute, task, mesh] + .into_iter() + .flatten() + { // Let each job closure stand on its own. let mut validator = validator.clone(); let path = path.to_owned(); diff --git a/tests/src/run.rs b/tests/src/run.rs index e066371d2e0..ceeb8aca0ba 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -12,7 +12,6 @@ use crate::{ GpuTestConfiguration, }; -#[derive(Hash)] /// Parameters and resources handed to the test function. pub struct TestingContext { pub instance: Instance, diff --git a/tests/tests/wgpu-gpu/mesh_shader/basic.hlsl b/tests/tests/wgpu-gpu/mesh_shader/basic.hlsl deleted file mode 100644 index dc587df737d..00000000000 --- a/tests/tests/wgpu-gpu/mesh_shader/basic.hlsl +++ /dev/null @@ -1,63 +0,0 @@ -struct OutVertex -{ - float4 Position : SV_POSITION; - float4 Color : COLOR; -}; -struct InVertex -{ - float4 Color : COLOR; -}; - -static const float4 positions[3] = { float4(0., 1.0, 0., 1.0), float4(-1.0, -1.0, 0., 1.0), float4(1.0, -1.0, 0., 1.0) }; -static const float4 colors[3] = { float4(0., 1., 0., 1.), float4(0., 0., 1., 1.), float4(1., 0., 0., 1.) }; - -struct EmptyPayload -{ - uint _nullField; -}; -groupshared EmptyPayload _emptyPayload; - -[numthreads(1, 1, 1)] -void Task() -{ - DispatchMesh(1, 1, 1, _emptyPayload); -} - -[outputtopology("triangle")] -[numthreads(1, 1, 1)] -void Mesh(out indices uint3 triangles[1], out vertices OutVertex vertices[3], in payload EmptyPayload _emptyPayload) -{ - SetMeshOutputCounts(3, 1); - - vertices[0].Position = positions[0]; - vertices[1].Position = positions[1]; - vertices[2].Position = positions[2]; - - vertices[0].Color = colors[0]; - vertices[1].Color = colors[1]; - vertices[2].Color = colors[2]; - - triangles[0] = uint3(0, 1, 2); -} - -[outputtopology("triangle")] -[numthreads(1, 1, 1)] -void MeshNoTask(out indices uint3 triangles[1], out vertices OutVertex vertices[3]) -{ - SetMeshOutputCounts(3, 1); - - vertices[0].Position = positions[0]; - vertices[1].Position = positions[1]; - vertices[2].Position = positions[2]; - - vertices[0].Color = colors[0]; - vertices[1].Color = colors[1]; - vertices[2].Color = colors[2]; - - triangles[0] = uint3(0, 1, 2); -} - -float4 Frag(InVertex vertex) : SV_Target -{ - return vertex.Color; -} diff --git a/tests/tests/wgpu-gpu/mesh_shader/mod.rs b/tests/tests/wgpu-gpu/mesh_shader/mod.rs index f188e742db8..b53460d6863 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/mod.rs +++ b/tests/tests/wgpu-gpu/mesh_shader/mod.rs @@ -1,5 +1,3 @@ -use std::hash::{DefaultHasher, Hash, Hasher}; - use wgpu::util::DeviceExt; use wgpu_test::{ gpu_test, GpuTestConfiguration, GpuTestInitializer, TestParameters, TestingContext, @@ -29,47 +27,6 @@ fn compile_wgsl(device: &wgpu::Device) -> wgpu::ShaderModule { source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), }) } -fn compile_hlsl( - device: &wgpu::Device, - entry: &str, - stage_str: &str, - test_name: &str, -) -> wgpu::ShaderModule { - // Each test needs its own files - let out_path = format!( - "{}/tests/wgpu-gpu/mesh_shader/{test_name}.{stage_str}.cso", - env!("CARGO_MANIFEST_DIR") - ); - let cmd = std::process::Command::new("dxc") - .args([ - "-T", - &format!("{stage_str}_6_5"), - "-E", - entry, - &format!( - "{}/tests/wgpu-gpu/mesh_shader/basic.hlsl", - env!("CARGO_MANIFEST_DIR") - ), - "-Fo", - &out_path, - ]) - .output() - .unwrap(); - if !cmd.status.success() { - panic!("DXC failed:\n{}", String::from_utf8(cmd.stderr).unwrap()); - } - let file = std::fs::read(&out_path).unwrap(); - std::fs::remove_file(out_path).unwrap(); - unsafe { - device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough { - entry_point: entry.to_owned(), - label: None, - num_workgroups: (1, 1, 1), - dxil: Some(std::borrow::Cow::Owned(file)), - ..Default::default() - }) - } -} fn compile_msl(device: &wgpu::Device, entry: &str) -> wgpu::ShaderModule { unsafe { @@ -86,7 +43,6 @@ fn compile_msl(device: &wgpu::Device, entry: &str) -> wgpu::ShaderModule { fn get_shaders( device: &wgpu::Device, backend: wgpu::Backend, - test_name: &str, info: &MeshPipelineTestInfo, ) -> ( Option, @@ -102,7 +58,7 @@ fn get_shaders( // In the case that the platform does support mesh shaders, the dummy // shader is used to avoid requiring EXPERIMENTAL_PASSTHROUGH_SHADERS. match backend { - wgpu::Backend::Vulkan => ( + wgpu::Backend::Vulkan | wgpu::Backend::Dx12 => ( info.use_task.then(|| compile_wgsl(device)), compile_wgsl(device), info.use_frag.then(|| compile_wgsl(device)), @@ -116,21 +72,6 @@ fn get_shaders( }, "fs_main", ), - wgpu::Backend::Dx12 => ( - info.use_task - .then(|| compile_hlsl(device, "Task", "as", test_name)), - compile_hlsl( - device, - if info.use_task { "Mesh" } else { "MeshNoTask" }, - "ms", - test_name, - ), - info.use_frag - .then(|| compile_hlsl(device, "Frag", "ps", test_name)), - "main", - "main", - "main", - ), wgpu::Backend::Metal => ( info.use_task.then(|| compile_msl(device, "taskShader")), compile_msl( @@ -186,20 +127,12 @@ struct MeshPipelineTestInfo { divergent: bool, } -fn hash_testing_context(ctx: &TestingContext) -> u64 { - let mut hasher = DefaultHasher::new(); - ctx.hash(&mut hasher); - hasher.finish() -} - fn mesh_pipeline_build(ctx: &TestingContext, info: MeshPipelineTestInfo) { let backend = ctx.adapter.get_info().backend; let device = &ctx.device; let (_depth_image, depth_view, depth_state) = create_depth(device); - let test_hash = hash_testing_context(ctx).to_string(); - let (task, mesh, frag, ts_name, ms_name, fs_name) = - get_shaders(device, backend, &test_hash, &info); + let (task, mesh, frag, ts_name, ms_name, fs_name) = get_shaders(device, backend, &info); let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[], @@ -275,10 +208,8 @@ fn mesh_draw(ctx: &TestingContext, draw_type: DrawType, info: MeshPipelineTestIn let backend = ctx.adapter.get_info().backend; let device = &ctx.device; let (_depth_image, depth_view, depth_state) = create_depth(device); - let test_hash = hash_testing_context(ctx).to_string(); - let (task, mesh, frag, ts_name, ms_name, fs_name) = - get_shaders(device, backend, &test_hash, &info); + let (task, mesh, frag, ts_name, ms_name, fs_name) = get_shaders(device, backend, &info); let frag = frag.unwrap(); let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, diff --git a/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl b/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl index 13f6c225e46..8a8d6de1195 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl +++ b/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl @@ -96,9 +96,9 @@ fn ms_no_ts() { @mesh(mesh_output) @workgroup_size(2) -fn ms_divergent(@builtin(local_invocation_index) index: u32) { +fn ms_divergent(@builtin(local_invocation_id) thread_id: vec3) { // Workgroup with 2 threads. They return at different points. - if index == 0 { + if thread_id.x == 0 { mesh_output.vertex_count = 3; mesh_output.primitive_count = 1; workgroupData = 2.0; diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 3c3aa8af33b..5077ed5a0d7 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -679,6 +679,7 @@ impl super::Adapter { } .is_ok() && features7.MeshShaderTier != Direct3D12::D3D12_MESH_SHADER_TIER_NOT_SUPPORTED + && shader_model >= naga::back::hlsl::ShaderModel::V6_5 }; features.set( wgt::Features::EXPERIMENTAL_MESH_SHADER, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 26ee47707ba..52f165dca25 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -228,6 +228,7 @@ impl super::Device { compiler_container, shader_cache: Default::default(), counters: Default::default(), + limits: limits.clone(), }) } @@ -290,6 +291,7 @@ impl super::Device { let needs_temp_options = stage.zero_initialize_workgroup_memory != layout.naga_options.zero_initialize_workgroup_memory || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing + || !stage.module.runtime_checks.task_shader_dispatch_tracking || stage.module.runtime_checks.force_loop_bounding != layout.naga_options.force_loop_bounding; // Note: ray query initialization tracking not yet implemented @@ -299,6 +301,9 @@ impl super::Device { temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; temp_options.restrict_indexing = stage.module.runtime_checks.bounds_checks; temp_options.force_loop_bounding = stage.module.runtime_checks.force_loop_bounding; + if !stage.module.runtime_checks.task_shader_dispatch_tracking { + temp_options.task_runtime_limits = None; + } &temp_options } else { &layout.naga_options @@ -1488,6 +1493,10 @@ impl crate::Device for super::Device { sampler_buffer_binding_map, external_texture_binding_map, force_loop_bounding: true, + task_runtime_limits: Some(naga::back::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: self.limits.max_task_mesh_workgroups_per_dimension, + max_mesh_workgroups_total: self.limits.max_task_mesh_workgroup_total_count, + }), }, }) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 51f24eb9fc2..7316ac90f1d 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -712,6 +712,7 @@ pub struct Device { compiler_container: Arc, shader_cache: Mutex, counters: Arc, + limits: wgt::Limits, } impl Drop for Device { diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 796f660e34d..23e512ae623 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -2177,6 +2177,7 @@ impl super::Adapter { drop_callback: Option, enabled_extensions: &[&'static CStr], features: wgt::Features, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, family_index: u32, queue_index: u32, @@ -2437,6 +2438,10 @@ impl super::Adapter { // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None, + task_runtime_limits: Some(naga::back::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: limits.max_task_mesh_workgroups_per_dimension, + max_mesh_workgroups_total: limits.max_task_mesh_workgroup_total_count, + }), } }; @@ -2549,6 +2554,7 @@ impl super::Adapter { pub unsafe fn open_with_callback<'a>( &self, features: wgt::Features, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, callback: Option>>, ) -> Result, crate::DeviceError> { @@ -2611,6 +2617,7 @@ impl super::Adapter { None, &enabled_extensions, features, + limits, memory_hints, family_info.queue_family_index, 0, @@ -2625,10 +2632,10 @@ impl crate::Adapter for super::Adapter { unsafe fn open( &self, features: wgt::Features, - _limits: &wgt::Limits, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { - unsafe { self.open_with_callback(features, memory_hints, None) } + unsafe { self.open_with_callback(features, limits, memory_hints, None) } } unsafe fn texture_format_capabilities( diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index f804a208fb8..d489ceead6c 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -653,7 +653,8 @@ impl super::Device { || !runtime_checks.ray_query_initialization_tracking || !binding_map.is_empty() || naga_shader.debug_source.is_some() - || !stage.zero_initialize_workgroup_memory; + || !stage.zero_initialize_workgroup_memory + || !runtime_checks.task_shader_dispatch_tracking; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -686,6 +687,9 @@ impl super::Device { temp_options.zero_initialize_workgroup_memory = naga::back::spv::ZeroInitializeWorkgroupMemoryMode::None; } + if !runtime_checks.task_shader_dispatch_tracking { + temp_options.task_runtime_limits = None; + } &temp_options } else { diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs index 99e173a5aeb..97a3ebe11ca 100644 --- a/wgpu-types/src/features.rs +++ b/wgpu-types/src/features.rs @@ -1153,6 +1153,10 @@ bitflags_array! { /// /// Naga is only supported on vulkan. On other platforms you will have to use passthrough shaders. /// + /// It is recommended to use [`Device::create_shader_module_trusted`] with [`ShaderRuntimeChecks::unchecked()`] + /// to avoid workgroup memory zero initialization, which can be expensive due to zero initialization being + /// single-threaded currently. + /// /// Some Mesa drivers including LLVMPIPE but not RADV fail to run the naga generated code. /// [This may be our bug and will be investigated.](https://github.com/gfx-rs/wgpu/issues/8727) /// However, due to the nature of the failure, the fact that it is unique, and the random changes @@ -1160,6 +1164,9 @@ bitflags_array! { /// [this Mesa issue.](https://gitlab.freedesktop.org/mesa/mesa/-/issues/14376) /// /// This is a native only feature. + /// + /// [`Device::create_shader_module_trusted`]: https://docs.rs/wgpu/latest/wgpu/struct.Device.html#method.create_shader_module_trusted + /// [`ShaderRuntimeChecks::unchecked()`]: crate::ShaderRuntimeChecks::unchecked const EXPERIMENTAL_MESH_SHADER = 1 << 48; /// ***THIS IS EXPERIMENTAL:*** Features enabled by this may have diff --git a/wgpu-types/src/shader.rs b/wgpu-types/src/shader.rs index f34ec963b0e..0bb76edfbc5 100644 --- a/wgpu-types/src/shader.rs +++ b/wgpu-types/src/shader.rs @@ -43,6 +43,9 @@ pub struct ShaderRuntimeChecks { /// /// It is the aim that these cases will not cause UB if this is set to true, but currently this will still happen on DX12 and Metal. pub ray_query_initialization_tracking: bool, + + /// If false, task shaders will not validate that the mesh shader grid they dispatch is within legal limits. + pub task_shader_dispatch_tracking: bool, } impl ShaderRuntimeChecks { @@ -76,6 +79,7 @@ impl ShaderRuntimeChecks { bounds_checks: all_checks, force_loop_bounding: all_checks, ray_query_initialization_tracking: all_checks, + task_shader_dispatch_tracking: all_checks, } } }