diff --git a/Cargo.toml b/Cargo.toml index 03c104ca..681d4619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ gltf = { version = "1.1", default-features = false } log = "0.4" mint = "0.5" naga = { version = "22", features = ["wgsl-in"] } +nanorand = { version = "0.7", default-features = false } profiling = "1" slab = "0.4" strum = { version = "0.25", features = ["derive"] } @@ -79,7 +80,7 @@ glam = { workspace = true } log = { workspace = true } mint = { workspace = true, features = ["serde"] } naga = { workspace = true } -nanorand = { version = "0.7", default-features = false, features = ["wyrand"] } +nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } ron = "0.8" serde = { version = "1", features = ["serde_derive"] } @@ -95,7 +96,6 @@ egui-winit = "0.28" console_error_panic_hook = "0.1.7" console_log = "1" web-sys = { workspace = true, features = ["Window"] } -getrandom = { version = "0.2", features = ["js"] } [target.'cfg(any(target_os = "windows", target_os = "linux"))'.dev-dependencies] renderdoc = "0.12" diff --git a/blade-graphics/src/util.rs b/blade-graphics/src/util.rs index 23e876e0..f0d2013e 100644 --- a/blade-graphics/src/util.rs +++ b/blade-graphics/src/util.rs @@ -94,14 +94,19 @@ impl super::TextureFormat { } } +impl super::Extent { + pub fn group_by(&self, size: [u32; 3]) -> [u32; 3] { + [ + (self.width + size[0] - 1) / size[0], + (self.height + size[1] - 1) / size[1], + (self.depth + size[2] - 1) / size[2], + ] + } +} + impl super::ComputePipeline { /// Return the dispatch group counts sufficient to cover the given extent. pub fn get_dispatch_for(&self, extent: super::Extent) -> [u32; 3] { - let wg_size = self.get_workgroup_size(); - [ - (extent.width + wg_size[0] - 1) / wg_size[0], - (extent.height + wg_size[1] - 1) / wg_size[1], - (extent.depth + wg_size[2] - 1) / wg_size[2], - ] + extent.group_by(self.get_workgroup_size()) } } diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index a135a337..ff4cf5a5 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -22,9 +22,10 @@ impl ExposeHud for blade_render::RayConfig { egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) .text("Spatial tap history"), ); + ui.add(egui::widgets::Slider::new(&mut self.group_mixer, 1..=10).text("Group mixer")); ui.add( - egui::widgets::Slider::new(&mut self.spatial_radius, 1..=50) - .text("Spatial radius (px)"), + egui::widgets::Slider::new(&mut self.spatial_min_distance, 1..=10) + .text("Spatial minimum distance (px)"), ); ui.add( egui::widgets::Slider::new(&mut self.t_start, 0.001..=0.5) diff --git a/blade-render/code/blur.wgsl b/blade-render/code/blur.wgsl index 3207ef60..3aec665b 100644 --- a/blade-render/code/blur.wgsl +++ b/blade-render/code/blur.wgsl @@ -1,5 +1,5 @@ #include "camera.inc.wgsl" -#include "gbuf.inc.wgsl" +#include "motion.inc.wgsl" #include "quaternion.inc.wgsl" #include "surface.inc.wgsl" diff --git a/blade-render/code/color.inc.wgsl b/blade-render/code/color.inc.wgsl new file mode 100644 index 00000000..8e84cfda --- /dev/null +++ b/blade-render/code/color.inc.wgsl @@ -0,0 +1,19 @@ +fn hsv_to_rgb(h: f32, s: f32, v: f32) -> vec3 { + let c = v * s; + let x = c * (1.0 - abs((h / 60.0) % 2.0 - 1.0)); + var q = vec3(v - c); + if (h < 60.0) { + q.r += c; q.g += x; + } else if (h < 120.0) { + q.g += c; q.r += x; + } else if (h < 180.0) { + q.g += c; q.b += x; + } else if (h < 240.0) { + q.b += c; q.g += x; + } else if (h < 300.0) { + q.b += c; q.r += x; + } else { + q.r += c; q.b += x; + } + return q; +} diff --git a/blade-render/code/fill-gbuf.wgsl b/blade-render/code/fill-gbuf.wgsl deleted file mode 100644 index 346edf51..00000000 --- a/blade-render/code/fill-gbuf.wgsl +++ /dev/null @@ -1,204 +0,0 @@ -#include "quaternion.inc.wgsl" -#include "camera.inc.wgsl" -#include "debug.inc.wgsl" -#include "debug-param.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: use proper WGSL -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; - -// Has to match the host! -struct Vertex { - pos: vec3, - bitangent_sign: f32, - tex_coords: vec2, - normal: u32, - tangent: u32, -} -struct VertexBuffer { - data: array, -} -struct IndexBuffer { - data: array, -} -var vertex_buffers: binding_array; -var index_buffers: binding_array; -var textures: binding_array>; -var sampler_linear: sampler; -var sampler_nearest: sampler; - -struct HitEntry { - index_buf: u32, - vertex_buf: u32, - winding: f32, - // packed quaternion - geometry_to_world_rotation: u32, - geometry_to_object: mat4x3, - prev_object_to_world: mat4x3, - base_color_texture: u32, - // packed color factor - base_color_factor: u32, - normal_texture: u32, -} -var hit_entries: array; - -var camera: CameraParams; -var prev_camera: CameraParams; -var debug: DebugParams; -var acc_struct: acceleration_structure; - -var out_depth: texture_storage_2d; -var out_flat_normal: texture_storage_2d; -var out_basis: texture_storage_2d; -var out_albedo: texture_storage_2d; -var out_motion: texture_storage_2d; -var out_debug: texture_storage_2d; - -fn decode_normal(raw: u32) -> vec3 { - return unpack4x8snorm(raw).xyz; -} - -fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { - let nw = normalize(qrot(rotation, decode_normal(normal_raw))); - debug_line(pos, pos + debug_len * nw, color); -} - -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { - if (any(global_id.xy >= camera.target_size)) { - return; - } - - var rq: ray_query; - let ray_dir = get_ray_direction(camera, vec2(global_id.xy)); - rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); - rayQueryProceed(&rq); - let intersection = rayQueryGetCommittedIntersection(&rq); - - var depth = 0.0; - var basis = vec4(0.0); - var flat_normal = vec3(0.0); - var albedo = vec3(1.0); - var motion = vec2(0.0); - let enable_debug = all(global_id.xy == debug.mouse_pos); - - if (intersection.kind != RAY_QUERY_INTERSECTION_NONE) { - let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; - depth = intersection.t; - - var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); - if (entry.index_buf != ~0u) { - let iptr = &index_buffers[entry.index_buf].data; - indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); - } - - let vptr = &vertex_buffers[entry.vertex_buf].data; - let vertices = array( - (*vptr)[indices.x], - (*vptr)[indices.y], - (*vptr)[indices.z], - ); - - let positions_object = entry.geometry_to_object * mat3x4( - vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) - ); - let positions = intersection.object_to_world * mat3x4( - vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) - ); - flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); - - let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); - let position_object = vec4(positions_object * barycentrics, 1.0); - let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; - let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); - let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); - let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; - - let lod = 0.0; //TODO: this is actually complicated - - let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); - let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); - var normal_local: vec3; - if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { - normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map - } else { - let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; - normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); - } - var normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); - basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); - - let hit_position = camera.position + intersection.t * ray_dir; - if (enable_debug) { - debug_buf.entry.custom_index = intersection.instance_custom_index; - debug_buf.entry.depth = intersection.t; - debug_buf.entry.tex_coords = tex_coords; - debug_buf.entry.base_color_texture = entry.base_color_texture; - debug_buf.entry.normal_texture = entry.normal_texture; - debug_buf.entry.position = hit_position; - debug_buf.entry.flat_normal = flat_normal; - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { - let normal_len = 0.15 * intersection.t; - let side = 0.05 * intersection.t; - debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); - debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); - debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { - let debug_len = intersection.t * 0.2; - debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); - debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); - debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); - let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; - debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); - // note: dynamic indexing into positions isn't allowed by WGSL yet - debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - // draw tangent space - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); - } - - let base_color_factor = unpack4x8unorm(entry.base_color_factor); - if ((debug.texture_flags & DebugTextureFlags_ALBEDO) != 0u) { - albedo = base_color_factor.xyz; - } else { - let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); - albedo = (base_color_factor * base_color_sample).xyz; - } - - if (debug.view_mode == DebugMode_HitConsistency) { - let reprojected = get_projected_pixel(camera, hit_position); - let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; - let camera_projection_diff = vec2(global_id.xy) - vec2(reprojected); - let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); - textureStore(out_debug, global_id.xy, consistency); - } - - let prev_position = (entry.prev_object_to_world * position_object).xyz; - let prev_screen = get_projected_pixel_float(prev_camera, prev_position); - //TODO: consider just storing integers here? - //TODO: technically this "0.5" is just a waste compute on both packing and unpacking - motion = prev_screen - vec2(global_id.xy) - 0.5; - if (debug.view_mode == DebugMode_Motion) { - textureStore(out_debug, global_id.xy, vec4(motion * MOTION_SCALE + vec2(0.5), 0.0, 1.0)); - } - } else { - if (enable_debug) { - debug_buf.entry = DebugEntry(); - } - if (debug.view_mode != DebugMode_Final) { - textureStore(out_debug, global_id.xy, vec4(0.0)); - } - } - - // TODO: option to avoid writing data for the sky - textureStore(out_depth, global_id.xy, vec4(depth, 0.0, 0.0, 0.0)); - textureStore(out_basis, global_id.xy, basis); - textureStore(out_flat_normal, global_id.xy, vec4(flat_normal, 0.0)); - textureStore(out_albedo, global_id.xy, vec4(albedo, 0.0)); - textureStore(out_motion, global_id.xy, vec4(motion * MOTION_SCALE, 0.0, 0.0)); -} diff --git a/blade-render/code/gbuf.inc.wgsl b/blade-render/code/gbuf.inc.wgsl deleted file mode 100644 index ecb4642d..00000000 --- a/blade-render/code/gbuf.inc.wgsl +++ /dev/null @@ -1,2 +0,0 @@ -const MOTION_SCALE: f32 = 0.02; -const USE_MOTION_VECTORS: bool = true; \ No newline at end of file diff --git a/blade-render/code/geometry.inc.wgsl b/blade-render/code/geometry.inc.wgsl new file mode 100644 index 00000000..99b771b1 --- /dev/null +++ b/blade-render/code/geometry.inc.wgsl @@ -0,0 +1,182 @@ +//TODO: https://github.com/gfx-rs/wgpu/pull/5429 +const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; + +// Has to match the host! +struct Vertex { + pos: vec3, + bitangent_sign: f32, + tex_coords: vec2, + normal: u32, + tangent: u32, +} +struct VertexBuffer { + data: array, +} +struct IndexBuffer { + data: array, +} +var vertex_buffers: binding_array; +var index_buffers: binding_array; +var textures: binding_array>; + +struct HitEntry { + index_buf: u32, + vertex_buf: u32, + winding: f32, + // packed quaternion + geometry_to_world_rotation: u32, + geometry_to_object: mat4x3, + prev_object_to_world: mat4x3, + base_color_texture: u32, + // packed color factor + base_color_factor: u32, + normal_texture: u32, +} +var hit_entries: array; + +fn decode_normal(raw: u32) -> vec3 { + return unpack4x8snorm(raw).xyz; +} + +fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { + let nw = normalize(qrot(rotation, decode_normal(normal_raw))); + debug_line(pos, pos + debug_len * nw, color); +} + +struct RichSurface { + inner: Surface, + position: vec3, + albedo: vec3, + motion: vec2, +} + +fn fetch_geometry(pixel_coord: vec2, enable_debug: bool, is_primary: bool) -> RichSurface { + var rq: ray_query; + let ray_dir = get_ray_direction(camera, pixel_coord); + rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); + rayQueryProceed(&rq); + let intersection = rayQueryGetCommittedIntersection(&rq); + + var rs = RichSurface(); + rs.albedo = vec3(1.0); + + if (intersection.kind == RAY_QUERY_INTERSECTION_NONE) { + if (enable_debug) { + debug_buf.entry = DebugEntry(); + } + return rs; + } + + let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; + + var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); + if (entry.index_buf != ~0u) { + let iptr = &index_buffers[entry.index_buf].data; + indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); + } + + let vptr = &vertex_buffers[entry.vertex_buf].data; + let vertices = array( + (*vptr)[indices.x], + (*vptr)[indices.y], + (*vptr)[indices.z], + ); + + let positions_object = entry.geometry_to_object * mat3x4( + vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) + ); + let positions = intersection.object_to_world * mat3x4( + vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) + ); + let flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); + + let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); + let position_object = vec4(positions_object * barycentrics, 1.0); + let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; + let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); + let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); + let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; + + let lod = 0.0; //TODO: this is actually complicated + + let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); + let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); + var normal_local: vec3; + if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { + normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map + } else { + let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; + normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); + } + let normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); + let basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); + + let hit_position = camera.position + intersection.t * ray_dir; + if (enable_debug && is_primary) { + debug_buf.entry.custom_index = intersection.instance_custom_index; + debug_buf.entry.depth = intersection.t; + debug_buf.entry.tex_coords = tex_coords; + debug_buf.entry.base_color_texture = entry.base_color_texture; + debug_buf.entry.normal_texture = entry.normal_texture; + debug_buf.entry.position = hit_position; + debug_buf.entry.flat_normal = flat_normal; + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { + let normal_len = 0.15 * intersection.t; + let side = 0.05 * intersection.t; + debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); + debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); + debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { + let debug_len = intersection.t * 0.2; + debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); + debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); + debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); + let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; + debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); + // note: dynamic indexing into positions isn't allowed by WGSL yet + debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + // draw tangent space + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); + } + + rs.albedo = unpack4x8unorm(entry.base_color_factor).xyz; + if ((debug.texture_flags & DebugTextureFlags_ALBEDO) == 0u) { + let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); + rs.albedo *= base_color_sample.xyz; + } + + if (is_primary) { + if (debug.view_mode == DebugMode_Depth) { + textureStore(out_debug, pixel_coord, vec4(intersection.t / camera.depth)); + } + if (debug.view_mode == DebugMode_Normal) { + textureStore(out_debug, pixel_coord, vec4(normal, 0.0)); + } + if (debug.view_mode == DebugMode_HitConsistency) { + let reprojected = get_projected_pixel(camera, hit_position); + let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; + let camera_projection_diff = vec2(pixel_coord - reprojected); + let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); + textureStore(out_debug, pixel_coord, consistency); + } + } + + let prev_position = (entry.prev_object_to_world * position_object).xyz; + let prev_screen = get_projected_pixel_float(prev_camera, prev_position); + //TODO: consider just storing integers here? + //TODO: technically this "0.5" is just a waste compute on both packing and unpacking + rs.motion = prev_screen - vec2(pixel_coord) - 0.5; + rs.position = hit_position; + + // Write down the Surface + rs.inner.basis = basis; + rs.inner.flat_normal = flat_normal; + rs.inner.depth = intersection.t; + return rs; +} diff --git a/blade-render/code/motion.inc.wgsl b/blade-render/code/motion.inc.wgsl new file mode 100644 index 00000000..a9a9f48a --- /dev/null +++ b/blade-render/code/motion.inc.wgsl @@ -0,0 +1,2 @@ +const MOTION_SCALE: f32 = 0.02; +const USE_MOTION_VECTORS: bool = true; diff --git a/blade-render/code/random.inc.wgsl b/blade-render/code/random.inc.wgsl index 3f68478c..da144dbf 100644 --- a/blade-render/code/random.inc.wgsl +++ b/blade-render/code/random.inc.wgsl @@ -56,6 +56,10 @@ fn murmur3(rng: ptr) -> u32 { return hash; } +fn random_u32(rng: ptr) -> u32 { + return murmur3(rng); +} + fn random_gen(rng: ptr) -> f32 { let v = murmur3(rng); let one = bitcast(1.0); diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index b27b1065..db350f3f 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -1,3 +1,4 @@ +#include "color.inc.wgsl" #include "quaternion.inc.wgsl" #include "random.inc.wgsl" #include "env-importance.inc.wgsl" @@ -5,24 +6,22 @@ #include "debug-param.inc.wgsl" #include "camera.inc.wgsl" #include "surface.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: use proper WGSL -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; +#include "geometry.inc.wgsl" +#include "motion.inc.wgsl" const PI: f32 = 3.1415926; -const MAX_RESERVOIRS: u32 = 2u; +const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; -// Base MIS for canonical samples. The constant isolates a critical difference between -// Bitterli's pseudocode (where it's 1) and NVidia's RTXDI implementation (where it's 0). -// With Bitterli's 1 we have MIS not respecting the prior history enough. -const BASE_CANONICAL_MIS: f32 = 0.05; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; +//TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 +const GROUP_SIZE: vec2 = vec2(8, 4); +const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; + struct MainParams { frame_index: u32, num_environment_samples: u32, @@ -31,10 +30,11 @@ struct MainParams { temporal_history: u32, spatial_taps: u32, spatial_tap_history: u32, - spatial_radius: i32, + spatial_min_distance: i32, t_start: f32, use_motion_vectors: u32, -}; + grid_scale: vec2, +} var camera: CameraParams; var prev_camera: CameraParams; @@ -54,7 +54,14 @@ struct StoredReservoir { confidence: f32, } var reservoirs: array; -var prev_reservoirs: array; + +struct PixelCache { + surface: Surface, + reservoir: StoredReservoir, + //Note: we could store direction XY in local camera space instead + world_pos: vec3, +} +var pixel_cache: array; struct LightSample { radiance: vec3, @@ -83,12 +90,6 @@ fn get_reservoir_index(pixel: vec2, camera: CameraParams) -> i32 { } } -fn get_pixel_from_reservoir_index(index: i32, camera: CameraParams) -> vec2 { - let y = index / i32(camera.target_size.x); - let x = index - y * i32(camera.target_size.x); - return vec2(x, y); -} - fn bump_reservoir(r: ptr, history: f32) { (*r).history += history; } @@ -102,6 +103,7 @@ fn make_reservoir(ls: LightSample, light_index: u32, brdf: vec3) -> LiveRes r.history = 1.0; return r; } + fn merge_reservoir(r: ptr, other: LiveReservoir, random: f32) -> bool { (*r).weight_sum += other.weight_sum; (*r).history += other.history; @@ -115,12 +117,12 @@ fn merge_reservoir(r: ptr, other: LiveReservoir, random return false; } } -fn unpack_reservoir(f: StoredReservoir, max_history: u32) -> LiveReservoir { +fn unpack_reservoir(f: StoredReservoir, max_history: u32, radiance: vec3) -> LiveReservoir { var r: LiveReservoir; r.selected_light_index = f.light_index; r.selected_uv = f.light_uv; r.selected_target_score = f.target_score; - r.radiance = vec3(0.0); // to be continued... + r.radiance = radiance; let history = min(f.confidence, f32(max_history)); r.weight_sum = f.contribution_weight * f.target_score * history; r.history = history; @@ -140,13 +142,9 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { return pack_reservoir_detail(r, r.history); } -var t_depth: texture_2d; var t_prev_depth: texture_2d; -var t_basis: texture_2d; var t_prev_basis: texture_2d; -var t_flat_normal: texture_2d; var t_prev_flat_normal: texture_2d; -var t_motion: texture_2d; var out_diffuse: texture_storage_2d; var out_debug: texture_storage_2d; @@ -201,14 +199,6 @@ fn sample_light_from_environment(rng: ptr) -> LightSample return ls; } -fn read_surface(pixel: vec2) -> Surface { - var surface: Surface; - surface.basis = normalize(textureLoad(t_basis, pixel, 0)); - surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_depth, pixel, 0).x; - return surface; -} - fn read_prev_surface(pixel: vec2) -> Surface { var surface: Surface; surface.basis = normalize(textureLoad(t_prev_basis, pixel, 0)); @@ -217,6 +207,14 @@ fn read_prev_surface(pixel: vec2) -> Surface { return surface; } +fn thread_index_to_coord(thread_index: u32, group_id: vec3) -> vec2 { + let cluster_id = group_id.xy / parameters.grid_scale; + let cluster_offset = group_id.xy - cluster_id * parameters.grid_scale; + let local_id = vec2(thread_index % GROUP_SIZE.x, thread_index / GROUP_SIZE.x); + let global_id = (cluster_id * GROUP_SIZE + local_id) * parameters.grid_scale + cluster_offset; + return vec2(global_id); +} + fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { let lambert_brdf = 1.0 / PI; let lambert_term = qrot(qinv(surface.basis), dir).z; @@ -255,9 +253,8 @@ fn evaluate_reflected_light(surface: Surface, light_index: u32, light_uv: vec2, pos_world: vec3) -> vec2 { +fn get_prev_pixel(pixel: vec2, pos_world: vec3, motion: vec2) -> vec2 { if (USE_MOTION_VECTORS && parameters.use_motion_vectors != 0u) { - let motion = textureLoad(t_motion, pixel, 0).xy / MOTION_SCALE; return vec2(pixel) + 0.5 + motion; } else { return get_projected_pixel_float(prev_camera, pos_world); @@ -290,11 +287,11 @@ fn estimate_target_score_with_occlusion( if (check_ray_occluded(acs, position, direction, debug_len)) { return TargetScore(); - } else { - //Note: same as `evaluate_reflected_light` - let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; - return make_target_score(brdf * radiance); } + + //Note: same as `evaluate_reflected_light` + let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; + return make_target_score(brdf * radiance); } fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debug_len: f32) -> f32 { @@ -320,43 +317,11 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return brdf; } -struct HeuristicFactors { - weight: f32, - //history: f32, -} - -fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors { - var hf: HeuristicFactors; - let balance_denom = h0 * w0 + h1 * w1; - hf.weight = select(h0 * w0 / balance_denom, 0.0, balance_denom <= 0.0); - //hf.history = select(pow(clamp(w1 / w0, 0.0, 1.0), 8.0), 1.0, w0 <= 0.0); - return hf; -} - -struct RestirOutput { - radiance: vec3, -} - -fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, enable_debug: bool) -> RestirOutput { - if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); - } - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - if (surface.depth == 0.0) { - reservoirs[pixel_index] = StoredReservoir(); - let env = evaluate_environment(ray_dir); - return RestirOutput(env); - } - - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - let position = camera.position + surface.depth * ray_dir; - let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); - if (debug.view_mode == DebugMode_Normal) { - textureStore(out_debug, pixel, vec4(normal, 0.0)); - } - - var canonical = LiveReservoir(); +fn produce_canonical( + surface: Surface, position: vec3, + rng: ptr, debug_len: f32, +) -> LiveReservoir { + var reservoir = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; if (parameters.environment_importance_sampling != 0u) { @@ -368,156 +333,307 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0.0) { let other = make_reservoir(ls, 0u, vec3(brdf)); - merge_reservoir(&canonical, other, random_gen(rng)); + merge_reservoir(&reservoir, other, random_gen(rng)); } else { - bump_reservoir(&canonical, 1.0); + bump_reservoir(&reservoir, 1.0); } } + return reservoir; +} + +struct ResampleBase { + surface: Surface, + canonical: LiveReservoir, + world_pos: vec3, + accepted_count: f32, +} +struct ResampleResult { + selected: bool, + mis_canonical: f32, + mis_sample: f32, +} + +// Resample following Algorithm 8 in section 9.1 of Bitterli thesis +fn resample( + dst: ptr, color_and_weight: ptr>, + base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, + rng: ptr, debug_len: f32, +) -> ResampleResult { + var src: LiveReservoir; + let neighbor = other.reservoir; + var rr = ResampleResult(); + if (PAIRWISE_MIS) { + let canonical = base.canonical; + let neighbor_history = min(neighbor.confidence, f32(max_history)); + { // scoping this to hint the register allocation + let t_canonical_at_neighbor = estimate_target_score_with_occlusion( + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); + let nom = canonical.selected_target_score * canonical.history / base.accepted_count; + let denom = t_canonical_at_neighbor.score * neighbor_history + nom; + rr.mis_canonical = select(0.0, nom / denom, denom > 0.0); + } - //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(pixel, position)); + // Notes about t_neighbor_at_neighbor: + // 1. we assume lights aren't moving. Technically we should check if the + // target light has moved, and re-evaluate the occlusion. + // 2. we can use the cached target score, and there is no use of the target color + //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); + let t_neighbor_at_canonical = estimate_target_score_with_occlusion( + base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); + let nom = neighbor.target_score * neighbor_history; + let denom = nom + t_neighbor_at_canonical.score * canonical.history / base.accepted_count; + let mis_neighbor = select(0.0, nom / denom, denom > 0.0); + rr.mis_sample = mis_neighbor; + + src.history = neighbor_history; + src.selected_light_index = neighbor.light_index; + src.selected_uv = neighbor.light_uv; + src.selected_target_score = t_neighbor_at_canonical.score; + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor; + src.radiance = t_neighbor_at_canonical.color; + } else { + rr.mis_canonical = 0.0; + rr.mis_sample = 1.0; + let radiance = evaluate_reflected_light(base.surface, neighbor.light_index, neighbor.light_uv); + src = unpack_reservoir(neighbor, max_history, radiance); + } - // First, gather the list of reservoirs to merge with - var accepted_reservoir_indices = array(); - var accepted_count = 0u; - var temporal_index = ~0u; - for (var tap = 0u; tap <= parameters.spatial_taps; tap += 1u) { - var other_pixel = prev_pixel; - if (tap != 0u) { - let r0 = max(prev_pixel - vec2(parameters.spatial_radius), vec2(0)); - let r1 = min(prev_pixel + vec2(parameters.spatial_radius + 1), vec2(prev_camera.target_size)); - other_pixel = vec2(mix(vec2(r0), vec2(r1), vec2(random_gen(rng), random_gen(rng)))); - } else if (parameters.temporal_tap == 0u) - { - continue; - } + if (DECOUPLED_SHADING) { + //TODO: use `mis_neighbor`O + *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); + } + if (src.weight_sum <= 0.0) { + bump_reservoir(dst, src.history); + } else { + merge_reservoir(dst, src, random_gen(rng)); + rr.selected = true; + } + return rr; +} - let other_index = get_reservoir_index(other_pixel, prev_camera); - if (other_index < 0) { - continue; - } - if (prev_reservoirs[other_index].confidence == 0.0) { - continue; - } +struct ResampleOutput { + reservoir: StoredReservoir, + color: vec3, +} - let other_surface = read_prev_surface(other_pixel); - let compatibility = compare_surfaces(surface, other_surface); - if (compatibility < 0.1) { - // if the surfaces are too different, there is no trust in this sample - continue; - } +fn revive_canonical(ro: ResampleOutput) -> LiveReservoir { + let radiance = select(vec3(0.0), ro.color / ro.reservoir.contribution_weight, ro.reservoir.contribution_weight > 0.0); + return unpack_reservoir(ro.reservoir, ~0u, radiance); +} - if (tap == 0u) { - temporal_index = accepted_count; - } - accepted_reservoir_indices[accepted_count] = other_index; - if (accepted_count < MAX_RESERVOIRS) { - accepted_count += 1u; +fn finalize_canonical(reservoir: LiveReservoir) -> ResampleOutput { + var ro = ResampleOutput(); + ro.reservoir = pack_reservoir(reservoir); + ro.color = ro.reservoir.contribution_weight * reservoir.radiance; + return ro; +} + +fn finalize_resampling( + reservoir: ptr, color_and_weight: ptr>, + base: ResampleBase, mis_canonical: f32, rng: ptr, +) -> ResampleOutput { + var ro = ResampleOutput(); + var canonical = base.canonical; + canonical.weight_sum *= mis_canonical / canonical.history; + merge_reservoir(reservoir, canonical, random_gen(rng)); + + if (base.accepted_count > 0.0) { + let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, PAIRWISE_MIS); + ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); + } else { + ro.reservoir = pack_reservoir(canonical); + } + + if (DECOUPLED_SHADING) { + if (canonical.selected_target_score > 0.0) { + let contribution_weight = canonical.weight_sum / canonical.selected_target_score; + *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); } + ro.color = (*color_and_weight).xyz / max((*color_and_weight).w, 0.001); + } else { + ro.color = ro.reservoir.contribution_weight * (*reservoir).radiance; + } + return ro; +} + +fn resample_temporal( + surface: Surface, motion: vec2, cur_pixel: vec2, position: vec3, + rng: ptr, debug_len: f32, +) -> ResampleOutput { + if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { + textureStore(out_debug, cur_pixel, vec4(0.0)); + } + if (surface.depth == 0.0) { + return ResampleOutput(); + } + let canonical = produce_canonical(surface, position, rng, debug_len); + + //TODO: find best match in a 2x2 grid + let prev_pixel = vec2(get_prev_pixel(cur_pixel, position, motion)); + + let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); + if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { + return finalize_canonical(canonical); + } + + let prev_reservoir = reservoirs[prev_reservoir_index]; + let prev_surface = read_prev_surface(prev_pixel); + // if the surfaces are too different, there is no trust in this sample + if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { + return finalize_canonical(canonical); } - // Next, evaluate the MIS of each of the samples versus the canonical one. var reservoir = LiveReservoir(); - var shaded_color = vec3(0.0); - var mis_canonical = BASE_CANONICAL_MIS; var color_and_weight = vec4(0.0); - for (var rid = 0u; rid < accepted_count; rid += 1u) { - let neighbor_index = accepted_reservoir_indices[rid]; - let neighbor = prev_reservoirs[neighbor_index]; - - let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, rid == temporal_index); - var other: LiveReservoir; - if (PAIRWISE_MIS) { - let neighbor_pixel = get_pixel_from_reservoir_index(neighbor_index, prev_camera); - let neighbor_history = min(neighbor.confidence, f32(max_history)); - { // scoping this to hint the register allocation - let neighbor_surface = read_prev_surface(neighbor_pixel); - let neighbor_dir = get_ray_direction(prev_camera, neighbor_pixel); - let neighbor_position = prev_camera.position + neighbor_surface.depth * neighbor_dir; - - let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - neighbor_surface, neighbor_position, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); - let mis_sub_canonical = balance_heuristic( - t_canonical_at_neighbor.score, canonical.selected_target_score, - neighbor_history * f32(accepted_count), canonical.history); - mis_canonical += 1.0 - mis_sub_canonical.weight; - } - - // Notes about t_neighbor_at_neighbor: - // 1. we assume lights aren't moving. Technically we should check if the - // target light has moved, and re-evaluate the occlusion. - // 2. we can use the cached target score, and there is no use of the target color - //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); - let t_neighbor_at_canonical = estimate_target_score_with_occlusion( - surface, position, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let mis_neighbor = balance_heuristic( - neighbor.target_score, t_neighbor_at_canonical.score, - neighbor_history * f32(accepted_count), canonical.history); - - other.history = neighbor_history; - other.selected_light_index = neighbor.light_index; - other.selected_uv = neighbor.light_uv; - other.selected_target_score = t_neighbor_at_canonical.score; - other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; - //Note: should be needed according to the paper - // other.history *= min(mis_neighbor.history, mis_sub_canonical.history); - other.radiance = t_neighbor_at_canonical.color; - } else { - other = unpack_reservoir(neighbor, max_history); - other.radiance = evaluate_reflected_light(surface, other.selected_light_index, other.selected_uv); + let base = ResampleBase(surface, canonical, position, 1.0); + + let prev_dir = get_ray_direction(prev_camera, prev_pixel); + let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; + let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, debug_len); + let mis_canonical = 1.0 + rr.mis_canonical; + + if (debug.view_mode == DebugMode_TemporalMatch) { + textureStore(out_debug, cur_pixel, vec4(1.0)); + } + if (debug.view_mode == DebugMode_TemporalMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); + } + if (debug.view_mode == DebugMode_TemporalMisError) { + let total = mis_canonical + rr.mis_sample; + textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + } + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); +} + +fn resample_spatial( + surface: Surface, cur_pixel: vec2, position: vec3, + group_id: vec3, canonical: LiveReservoir, + rng: ptr, debug_len: f32, +) -> ResampleOutput { + if (surface.depth == 0.0) { + if (debug.view_mode == DebugMode_SpatialMatch || debug.view_mode == DebugMode_SpatialMisCanonical || debug.view_mode == DebugMode_SpatialMisError) { + textureStore(out_debug, cur_pixel, vec4(0.0)); } + let dir = normalize(position - camera.position); + var ro = ResampleOutput(); + ro.color = evaluate_environment(dir); + return ro; + } - if (DECOUPLED_SHADING) { - color_and_weight += other.weight_sum * vec4(neighbor.contribution_weight * other.radiance, 1.0); + // gather the list of neighbors (within the workgroup) to resample. + var accepted_count = 0u; + var accepted_local_indices = array(); + let max_accepted = min(MAX_RESAMPLE, parameters.spatial_taps); + let num_candidates = parameters.spatial_taps * 3u; + for (var i = 0u; i < num_candidates && accepted_count < max_accepted; i += 1u) { + let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; + let diff = thread_index_to_coord(other_cache_index, group_id) - cur_pixel; + if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { + continue; } - if (other.weight_sum <= 0.0) { - bump_reservoir(&reservoir, other.history); - } else { - merge_reservoir(&reservoir, other, random_gen(rng)); + let other = pixel_cache[other_cache_index]; + // if the surfaces are too different, there is no trust in this sample + if (other.reservoir.confidence > 0.0 && compare_surfaces(surface, other.surface) > 0.1) { + accepted_local_indices[accepted_count] = other_cache_index; + accepted_count += 1u; } } - // Finally, merge in the canonical sample - if (PAIRWISE_MIS) { - canonical.weight_sum *= mis_canonical / canonical.history; - } - if (DECOUPLED_SHADING) { - //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + var reservoir = LiveReservoir(); + var color_and_weight = vec4(0.0); + let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + var mis_canonical = 1.0; + var mis_sample_sum = 0.0; + + // evaluate the MIS of each of the samples versus the canonical one. + for (var lid = 0u; lid < accepted_count; lid += 1u) { + let other = pixel_cache[accepted_local_indices[lid]]; + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, debug_len); + mis_canonical += rr.mis_canonical; + mis_sample_sum += rr.mis_sample; } - merge_reservoir(&reservoir, canonical, random_gen(rng)); - let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS); - let stored = pack_reservoir_detail(reservoir, effective_history); - reservoirs[pixel_index] = stored; - var ro = RestirOutput(); - if (DECOUPLED_SHADING) { - ro.radiance = color_and_weight.xyz / max(color_and_weight.w, 0.001); - } else { - ro.radiance = stored.contribution_weight * reservoir.radiance; + if (debug.view_mode == DebugMode_SpatialMatch) { + let value = f32(accepted_count) / max(1.0, f32(parameters.spatial_taps)); + textureStore(out_debug, cur_pixel, vec4(value)); } - return ro; + if (debug.view_mode == DebugMode_SpatialMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); + } + if (debug.view_mode == DebugMode_SpatialMisError) { + let total = mis_canonical + mis_sample_sum; + textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + } + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { - if (any(global_id.xy >= camera.target_size)) { +fn compute_restir( + rs: RichSurface, + pixel: vec2, local_index: u32, group_id: vec3, + rng: ptr, enable_debug: bool, +) -> vec3 { + let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); + + let temporal = resample_temporal(rs.inner, rs.motion, pixel, rs.position, rng, debug_len); + pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); + + // sync with the workgroup to ensure all reservoirs are available. + workgroupBarrier(); + + let temporal_live = revive_canonical(temporal); + let spatial = resample_spatial(rs.inner, pixel, rs.position, group_id, temporal_live, rng, debug_len); + + let pixel_index = get_reservoir_index(pixel, camera); + reservoirs[pixel_index] = spatial.reservoir; + return spatial.color; +} + +var out_depth: texture_storage_2d; +var out_basis: texture_storage_2d; +var out_flat_normal: texture_storage_2d; +var out_albedo: texture_storage_2d; +var out_motion: texture_storage_2d; + +@compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) +fn main( + @builtin(workgroup_id) group_id: vec3, + @builtin(local_invocation_index) local_index: u32, +) { + pixel_cache[local_index] = PixelCache(); + let pixel_coord = thread_index_to_coord(local_index, group_id); + if (any(vec2(pixel_coord) >= camera.target_size)) { + return; + } + + if (debug.view_mode == DebugMode_Grouping) { + var rng = random_init(group_id.y * 1000u + group_id.x, 0u); + let h = random_gen(&rng) * 360.0; + let color = hsv_to_rgb(h, 0.5, 1.0); + textureStore(out_debug, pixel_coord, vec4(color, 1.0)); return; } - let global_index = global_id.y * camera.target_size.x + global_id.x; + let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); + let rs = fetch_geometry(pixel_coord, true, enable_debug); + + // TODO: option to avoid writing data for the sky + textureStore(out_depth, pixel_coord, vec4(rs.inner.depth, 0.0, 0.0, 0.0)); + textureStore(out_basis, pixel_coord, rs.inner.basis); + textureStore(out_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); + textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); + textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); + + let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(vec2(global_id.xy)); - let enable_debug = all(global_id.xy == debug.mouse_pos); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(global_id.xy), &rng, enable_restir_debug); - let color = ro.radiance; + let color = compute_restir(rs, pixel_coord, local_index, group_id, &rng, enable_restir_debug); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); + if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; debug_buf.variance.count += 1u; } - textureStore(out_diffuse, global_id.xy, vec4(color, 1.0)); } diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index c856538f..e0e724c5 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -51,7 +51,14 @@ pub enum DebugMode { Normal = 2, Motion = 3, HitConsistency = 4, - Variance = 5, + Grouping = 5, + TemporalMatch = 10, + TemporalMisCanonical = 11, + TemporalMisError = 12, + SpatialMatch = 13, + SpatialMisCanonical = 14, + SpatialMisError = 15, + Variance = 100, } impl Default for DebugMode { @@ -93,7 +100,11 @@ pub struct RayConfig { pub temporal_history: u32, pub spatial_taps: u32, pub spatial_tap_history: u32, - pub spatial_radius: u32, + /// Minimal distance to a spatially reused pixel (in the current frame). + pub spatial_min_distance: u32, + /// Scale and mix the groups into clusters, to allow spatial samples to mix + /// outside of the original workgroup pixel bounds. + pub group_mixer: u32, pub t_start: f32, } @@ -199,7 +210,7 @@ impl RenderTarget { } struct RestirTargets { - reservoir_buf: [blade_graphics::Buffer; 2], + reservoir_buf: blade_graphics::Buffer, debug: RenderTarget<1>, depth: RenderTarget<2>, basis: RenderTarget<2>, @@ -218,14 +229,11 @@ impl RestirTargets { gpu: &blade_graphics::Context, ) -> Self { let total_reservoirs = size.width as usize * size.height as usize; - let mut reservoir_buf = [blade_graphics::Buffer::default(); 2]; - for (i, rb) in reservoir_buf.iter_mut().enumerate() { - *rb = gpu.create_buffer(blade_graphics::BufferDesc { - name: &format!("reservoirs{i}"), - size: reservoir_size as u64 * total_reservoirs as u64, - memory: blade_graphics::Memory::Device, - }); - } + let reservoir_buf = gpu.create_buffer(blade_graphics::BufferDesc { + name: "reservoirs", + size: reservoir_size as u64 * total_reservoirs as u64, + memory: blade_graphics::Memory::Device, + }); Self { reservoir_buf, @@ -277,9 +285,7 @@ impl RestirTargets { } fn destroy(&self, gpu: &blade_graphics::Context) { - for rb in self.reservoir_buf.iter() { - gpu.destroy_buffer(*rb); - } + gpu.destroy_buffer(self.reservoir_buf); self.debug.destroy(gpu); self.depth.destroy(gpu); self.basis.destroy(gpu); @@ -308,7 +314,6 @@ pub struct Renderer { shaders: Shaders, targets: RestirTargets, post_proc_input_index: usize, - fill_pipeline: blade_graphics::ComputePipeline, main_pipeline: blade_graphics::ComputePipeline, post_proc_pipeline: blade_graphics::RenderPipeline, blur: Blur, @@ -363,53 +368,38 @@ struct MainParams { temporal_history: u32, spatial_taps: u32, spatial_tap_history: u32, - spatial_radius: u32, + spatial_min_distance: u32, t_start: f32, use_motion_vectors: u32, + grid_scale: [u32; 2], } #[derive(blade_macros::ShaderData)] -struct FillData<'a> { +struct MainData<'a> { camera: CameraParams, prev_camera: CameraParams, debug: DebugParams, + parameters: MainParams, acc_struct: blade_graphics::AccelerationStructure, + prev_acc_struct: blade_graphics::AccelerationStructure, hit_entries: blade_graphics::BufferPiece, index_buffers: &'a blade_graphics::BufferArray, vertex_buffers: &'a blade_graphics::BufferArray, textures: &'a blade_graphics::TextureArray, sampler_linear: blade_graphics::Sampler, - debug_buf: blade_graphics::BufferPiece, - out_depth: blade_graphics::TextureView, - out_basis: blade_graphics::TextureView, - out_flat_normal: blade_graphics::TextureView, - out_albedo: blade_graphics::TextureView, - out_motion: blade_graphics::TextureView, - out_debug: blade_graphics::TextureView, -} - -#[derive(blade_macros::ShaderData)] -struct MainData { - camera: CameraParams, - prev_camera: CameraParams, - debug: DebugParams, - parameters: MainParams, - acc_struct: blade_graphics::AccelerationStructure, - prev_acc_struct: blade_graphics::AccelerationStructure, - sampler_linear: blade_graphics::Sampler, sampler_nearest: blade_graphics::Sampler, env_map: blade_graphics::TextureView, env_weights: blade_graphics::TextureView, - t_depth: blade_graphics::TextureView, t_prev_depth: blade_graphics::TextureView, - t_basis: blade_graphics::TextureView, t_prev_basis: blade_graphics::TextureView, - t_flat_normal: blade_graphics::TextureView, t_prev_flat_normal: blade_graphics::TextureView, - t_motion: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, - prev_reservoirs: blade_graphics::BufferPiece, + out_depth: blade_graphics::TextureView, + out_basis: blade_graphics::TextureView, + out_flat_normal: blade_graphics::TextureView, + out_albedo: blade_graphics::TextureView, + out_motion: blade_graphics::TextureView, out_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -487,7 +477,6 @@ struct HitEntry { #[derive(Clone, PartialEq)] pub struct Shaders { env_prepare: blade_asset::Handle, - fill_gbuf: blade_asset::Handle, ray_trace: blade_asset::Handle, blur: blade_asset::Handle, post_proc: blade_asset::Handle, @@ -500,7 +489,6 @@ impl Shaders { let mut ctx = asset_hub.open_context(path, "shader finish"); let shaders = Self { env_prepare: ctx.load_shader("env-prepare.wgsl"), - fill_gbuf: ctx.load_shader("fill-gbuf.wgsl"), ray_trace: ctx.load_shader("ray-trace.wgsl"), blur: ctx.load_shader("blur.wgsl"), post_proc: ctx.load_shader("post-proc.wgsl"), @@ -512,7 +500,6 @@ impl Shaders { } struct ShaderPipelines { - fill: blade_graphics::ComputePipeline, main: blade_graphics::ComputePipeline, temporal_accum: blade_graphics::ComputePipeline, atrous: blade_graphics::ComputePipeline, @@ -522,19 +509,6 @@ struct ShaderPipelines { } impl ShaderPipelines { - fn create_gbuf_fill( - shader: &blade_graphics::Shader, - gpu: &blade_graphics::Context, - ) -> blade_graphics::ComputePipeline { - shader.check_struct_size::(); - shader.check_struct_size::(); - let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "fill-gbuf", - data_layouts: &[&layout], - compute: shader.at("main"), - }) - } fn create_ray_trace( shader: &blade_graphics::Shader, gpu: &blade_graphics::Context, @@ -545,11 +519,17 @@ impl ShaderPipelines { shader.check_struct_size::(); shader.check_struct_size::(); let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { + let pipeline = gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { name: "ray-trace", data_layouts: &[&layout], compute: shader.at("main"), - }) + }); + + let pl_struct_size = shader.get_struct_size("PixelCache"); + let group_size = pipeline.get_workgroup_size(); + let wg_required = pl_struct_size * group_size[0] * group_size[1]; + log::info!("Using {} workgroup memory for RT", wg_required); + pipeline } fn create_temporal_accum( @@ -606,7 +586,6 @@ impl ShaderPipelines { let sh_main = shader_man[shaders.ray_trace].raw.as_ref().unwrap(); let sh_blur = shader_man[shaders.blur].raw.as_ref().unwrap(); Ok(Self { - fill: Self::create_gbuf_fill(shader_man[shaders.fill_gbuf].raw.as_ref().unwrap(), gpu), main: Self::create_ray_trace(sh_main, gpu), temporal_accum: Self::create_temporal_accum(sh_blur, gpu), atrous: Self::create_atrous(sh_blur, gpu), @@ -697,7 +676,6 @@ impl Renderer { shaders, targets, post_proc_input_index: 0, - fill_pipeline: sp.fill, main_pipeline: sp.main, post_proc_pipeline: sp.post_proc, blur: Blur { @@ -744,7 +722,6 @@ impl Renderer { // pipelines gpu.destroy_compute_pipeline(&mut self.blur.temporal_accum_pipeline); gpu.destroy_compute_pipeline(&mut self.blur.atrous_pipeline); - gpu.destroy_compute_pipeline(&mut self.fill_pipeline); gpu.destroy_compute_pipeline(&mut self.main_pipeline); gpu.destroy_render_pipeline(&mut self.post_proc_pipeline); } @@ -759,7 +736,6 @@ impl Renderer { let mut tasks = Vec::new(); let old = self.shaders.clone(); - tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.fill_gbuf)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.ray_trace)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.blur)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.post_proc)); @@ -776,11 +752,6 @@ impl Renderer { let _ = task.join(); } - if self.shaders.fill_gbuf != old.fill_gbuf { - if let Ok(ref shader) = asset_hub.shaders[self.shaders.fill_gbuf].raw { - self.fill_pipeline = ShaderPipelines::create_gbuf_fill(shader, gpu); - } - } if self.shaders.ray_trace != old.ray_trace { if let Ok(ref shader) = asset_hub.shaders[self.shaders.ray_trace].raw { assert_eq!( @@ -1089,13 +1060,11 @@ impl Renderer { self.debug.reset_lines(&mut transfer); } let total_reservoirs = self.surface_size.width as u64 * self.surface_size.height as u64; - for reservoir_buf in self.targets.reservoir_buf.iter() { - transfer.fill_buffer( - reservoir_buf.at(0), - total_reservoirs * self.reservoir_size as u64, - 0, - ); - } + transfer.fill_buffer( + self.targets.reservoir_buf.at(0), + total_reservoirs * self.reservoir_size as u64, + 0, + ); } if !config.frozen { @@ -1119,35 +1088,26 @@ impl Renderer { let (cur, prev) = self.work_indices(); if let mut pass = command_encoder.compute() { - let mut pc = pass.with(&self.fill_pipeline); - let groups = self.fill_pipeline.get_dispatch_for(self.surface_size); - pc.bind( - 0, - &FillData { - camera: self.targets.camera_params[cur], - prev_camera: self.targets.camera_params[prev], - debug, - acc_struct: self.acceleration_structure, - hit_entries: self.hit_buffer.into(), - index_buffers: &self.index_buffers, - vertex_buffers: &self.vertex_buffers, - textures: &self.textures, - sampler_linear: self.samplers.linear, - debug_buf: self.debug.buffer_resource(), - out_depth: self.targets.depth.views[cur], - out_basis: self.targets.basis.views[cur], - out_flat_normal: self.targets.flat_normal.views[cur], - out_albedo: self.targets.albedo.views[0], - out_motion: self.targets.motion.views[0], - out_debug: self.targets.debug.views[0], - }, - ); - pc.dispatch(groups); - } - - if let mut pass = command_encoder.compute() { + let grid_scale = { + let limit = ray_config.group_mixer; + let r = self.frame_index as u32 ^ 0x5A; + [r % limit + 1, (r / limit) % limit + 1] + }; + let groups = { + let wg_size = self.main_pipeline.get_workgroup_size(); + let cluster_size = [ + wg_size[0] * grid_scale[0], + wg_size[1] * grid_scale[1], + wg_size[2], + ]; + let clusters = self.surface_size.group_by(cluster_size); + [ + clusters[0] * grid_scale[0], + clusters[1] * grid_scale[1], + clusters[2], + ] + }; let mut pc = pass.with(&self.main_pipeline); - let groups = self.main_pipeline.get_dispatch_for(self.surface_size); pc.bind( 0, &MainData { @@ -1163,9 +1123,10 @@ impl Renderer { temporal_history: ray_config.temporal_history, spatial_taps: ray_config.spatial_taps, spatial_tap_history: ray_config.spatial_tap_history, - spatial_radius: ray_config.spatial_radius, + spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, + grid_scale, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index @@ -1176,20 +1137,24 @@ impl Renderer { } else { self.prev_acceleration_structure }, + hit_entries: self.hit_buffer.into(), + index_buffers: &self.index_buffers, + vertex_buffers: &self.vertex_buffers, + textures: &self.textures, sampler_linear: self.samplers.linear, sampler_nearest: self.samplers.nearest, env_map: self.env_map.main_view, env_weights: self.env_map.weight_view, - t_depth: self.targets.depth.views[cur], t_prev_depth: self.targets.depth.views[prev], - t_basis: self.targets.basis.views[cur], t_prev_basis: self.targets.basis.views[prev], - t_flat_normal: self.targets.flat_normal.views[cur], t_prev_flat_normal: self.targets.flat_normal.views[prev], - t_motion: self.targets.motion.views[0], debug_buf: self.debug.buffer_resource(), - reservoirs: self.targets.reservoir_buf[cur].into(), - prev_reservoirs: self.targets.reservoir_buf[prev].into(), + reservoirs: self.targets.reservoir_buf.into(), + out_depth: self.targets.depth.views[cur], + out_basis: self.targets.basis.views[cur], + out_flat_normal: self.targets.flat_normal.views[cur], + out_albedo: self.targets.albedo.views[0], + out_motion: self.targets.motion.views[0], out_diffuse: self.targets.light_diffuse.views[cur], out_debug: self.targets.debug.views[0], }, diff --git a/examples/scene/main.rs b/examples/scene/main.rs index f09de24a..903a30ca 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -264,7 +264,8 @@ impl Example { temporal_history: 10, spatial_taps: 1, spatial_tap_history: 5, - spatial_radius: 10, + spatial_min_distance: 4, + group_mixer: 10, t_start: 0.1, }, denoiser_enabled: true, @@ -582,6 +583,8 @@ impl Example { return; } + ui.checkbox(&mut self.track_hot_reloads, "Hot reloading"); + let mut selection = blade_render::SelectionInfo::default(); if self.debug.mouse_pos.is_some() { selection = self.renderer.read_debug_selection_info(); diff --git a/src/lib.rs b/src/lib.rs index 5c8320b6..b14b8ebe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -489,7 +489,8 @@ impl Engine { temporal_history: 10, spatial_taps: 1, spatial_tap_history: 5, - spatial_radius: 10, + spatial_min_distance: 4, + group_mixer: 10, t_start: 0.01, }, denoiser_enabled: true,