From a7eea0d329e85c66a5ace7622b865d08d70269df Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 18 Aug 2024 22:48:11 -0700 Subject: [PATCH 01/12] WIP local storage for reservoirs --- blade-render/code/random.inc.wgsl | 4 + blade-render/code/ray-trace.wgsl | 238 +++++++++++++++++------------- 2 files changed, 138 insertions(+), 104 deletions(-) diff --git a/blade-render/code/random.inc.wgsl b/blade-render/code/random.inc.wgsl index 3f68478c..da144dbf 100644 --- a/blade-render/code/random.inc.wgsl +++ b/blade-render/code/random.inc.wgsl @@ -56,6 +56,10 @@ fn murmur3(rng: ptr) -> u32 { return hash; } +fn random_u32(rng: ptr) -> u32 { + return murmur3(rng); +} + fn random_gen(rng: ptr) -> f32 { let v = murmur3(rng); let one = bitcast(1.0); diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index b27b1065..1a5b1736 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -7,11 +7,11 @@ #include "surface.inc.wgsl" #include "gbuf.inc.wgsl" -//TODO: use proper WGSL +//TODO: https://github.com/gfx-rs/wgpu/pull/5429 const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; const PI: f32 = 3.1415926; -const MAX_RESERVOIRS: u32 = 2u; +const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; @@ -23,6 +23,9 @@ const BASE_CANONICAL_MIS: f32 = 0.05; // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; +const GROUP_SIZE: vec2 = vec2(8, 8); +const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; + struct MainParams { frame_index: u32, num_environment_samples: u32, @@ -34,7 +37,7 @@ struct MainParams { spatial_radius: i32, t_start: f32, use_motion_vectors: u32, -}; +} var camera: CameraParams; var prev_camera: CameraParams; @@ -56,6 +59,14 @@ struct StoredReservoir { var reservoirs: array; var prev_reservoirs: array; +struct PixelCache { + surface: Surface, + reservoir: StoredReservoir, + //Note: we could store direction XY in local camera space instead + world_pos: vec3, +} +var pixel_cache: array; + struct LightSample { radiance: vec3, pdf: f32, @@ -333,14 +344,77 @@ fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors { return hf; } +struct ResampleBase { + surface: Surface, + canonical: LiveReservoir, + world_pos: vec3, + accepted_count: f32, +} +struct ResampleState { + reservoir: LiveReservoir, + mis_canonical: f32, + color_and_weight: vec4, +} + +/*fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { + var live: LiveReservoir; + let neighbor = other.reservoir; + if (PAIRWISE_MIS) { + let debug_len = select(0.0, other.surface.depth * 0.2, enable_debug); + let canonical = base.canonical; + let neighbor_history = min(neighbor.confidence, f32(max_history)); + { // scoping this to hint the register allocation + let t_canonical_at_neighbor = estimate_target_score_with_occlusion( + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); + let mis_sub_canonical = balance_heuristic( + t_canonical_at_neighbor.score, canonical.selected_target_score, + neighbor_history * base.accepted_count, canonical.history); + (*state).mis_canonical += 1.0 - mis_sub_canonical.weight; + } + + // Notes about t_neighbor_at_neighbor: + // 1. we assume lights aren't moving. Technically we should check if the + // target light has moved, and re-evaluate the occlusion. + // 2. we can use the cached target score, and there is no use of the target color + //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); + let t_neighbor_at_canonical = estimate_target_score_with_occlusion( + base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); + let mis_neighbor = balance_heuristic( + neighbor.target_score, t_neighbor_at_canonical.score, + neighbor_history * base.accepted_count, canonical.history); + + live.history = neighbor_history; + live.selected_light_index = neighbor.light_index; + live.selected_uv = neighbor.light_uv; + live.selected_target_score = t_neighbor_at_canonical.score; + live.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; + //Note: should be needed according to the paper + // live.history *= min(mis_neighbor.history, mis_sub_canonical.history); + live.radiance = t_neighbor_at_canonical.color; + } else { + live = unpack_reservoir(neighbor, max_history); + live.radiance = evaluate_reflected_light(base.surface, live.selected_light_index, live.selected_uv); + } + + if (DECOUPLED_SHADING) { + (*state).color_and_weight += live.weight_sum * vec4(neighbor.contribution_weight * live.radiance, 1.0); + } + if (live.weight_sum <= 0.0) { + bump_reservoir(&(*state).reservoir, live.history); + } else { + merge_reservoir(&(*state).reservoir, live, random_gen(rng)); + } +}*/ + struct RestirOutput { radiance: vec3, } -fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, enable_debug: bool) -> RestirOutput { +fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, local_index: u32, enable_debug: bool) -> RestirOutput { if (debug.view_mode == DebugMode_Depth) { textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } + pixel_cache[local_index] = PixelCache(); let ray_dir = get_ray_direction(camera, pixel); let pixel_index = get_reservoir_index(pixel, camera); if (surface.depth == 0.0) { @@ -356,6 +430,7 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(normal, 0.0)); } + // 1: build the canonical sample var canonical = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; @@ -376,132 +451,87 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(get_prev_pixel(pixel, position)); - - // First, gather the list of reservoirs to merge with - var accepted_reservoir_indices = array(); var accepted_count = 0u; - var temporal_index = ~0u; - for (var tap = 0u; tap <= parameters.spatial_taps; tap += 1u) { - var other_pixel = prev_pixel; - if (tap != 0u) { - let r0 = max(prev_pixel - vec2(parameters.spatial_radius), vec2(0)); - let r1 = min(prev_pixel + vec2(parameters.spatial_radius + 1), vec2(prev_camera.target_size)); - other_pixel = vec2(mix(vec2(r0), vec2(r1), vec2(random_gen(rng), random_gen(rng)))); - } else if (parameters.temporal_tap == 0u) - { - continue; - } - - let other_index = get_reservoir_index(other_pixel, prev_camera); - if (other_index < 0) { - continue; - } - if (prev_reservoirs[other_index].confidence == 0.0) { - continue; - } - - let other_surface = read_prev_surface(other_pixel); - let compatibility = compare_surfaces(surface, other_surface); - if (compatibility < 0.1) { + var accepted_local_indices = array(); + + // 2: read the temporal sample. + let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); + if (prev_reservoir_index >= 0) { + let prev_reservoir = prev_reservoirs[prev_reservoir_index]; + let prev_surface = read_prev_surface(prev_pixel); + let prev_dir = get_ray_direction(prev_camera, prev_pixel); + let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; + pixel_cache[local_index] = PixelCache(prev_surface, prev_reservoir, prev_world_pos); + + if (parameters.temporal_tap != 0u && prev_reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample - continue; - } - - if (tap == 0u) { - temporal_index = accepted_count; - } - accepted_reservoir_indices[accepted_count] = other_index; - if (accepted_count < MAX_RESERVOIRS) { - accepted_count += 1u; + if (compare_surfaces(surface, prev_surface) > 0.1) { + accepted_local_indices[0] = local_index; + accepted_count = 1u; + } } } - // Next, evaluate the MIS of each of the samples versus the canonical one. - var reservoir = LiveReservoir(); - var shaded_color = vec3(0.0); - var mis_canonical = BASE_CANONICAL_MIS; - var color_and_weight = vec4(0.0); - for (var rid = 0u; rid < accepted_count; rid += 1u) { - let neighbor_index = accepted_reservoir_indices[rid]; - let neighbor = prev_reservoirs[neighbor_index]; - - let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, rid == temporal_index); - var other: LiveReservoir; - if (PAIRWISE_MIS) { - let neighbor_pixel = get_pixel_from_reservoir_index(neighbor_index, prev_camera); - let neighbor_history = min(neighbor.confidence, f32(max_history)); - { // scoping this to hint the register allocation - let neighbor_surface = read_prev_surface(neighbor_pixel); - let neighbor_dir = get_ray_direction(prev_camera, neighbor_pixel); - let neighbor_position = prev_camera.position + neighbor_surface.depth * neighbor_dir; - - let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - neighbor_surface, neighbor_position, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); - let mis_sub_canonical = balance_heuristic( - t_canonical_at_neighbor.score, canonical.selected_target_score, - neighbor_history * f32(accepted_count), canonical.history); - mis_canonical += 1.0 - mis_sub_canonical.weight; - } + // 3: sync with the workgroup to ensure all reservoirs are available. + workgroupBarrier(); - // Notes about t_neighbor_at_neighbor: - // 1. we assume lights aren't moving. Technically we should check if the - // target light has moved, and re-evaluate the occlusion. - // 2. we can use the cached target score, and there is no use of the target color - //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); - let t_neighbor_at_canonical = estimate_target_score_with_occlusion( - surface, position, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let mis_neighbor = balance_heuristic( - neighbor.target_score, t_neighbor_at_canonical.score, - neighbor_history * f32(accepted_count), canonical.history); - - other.history = neighbor_history; - other.selected_light_index = neighbor.light_index; - other.selected_uv = neighbor.light_uv; - other.selected_target_score = t_neighbor_at_canonical.score; - other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; - //Note: should be needed according to the paper - // other.history *= min(mis_neighbor.history, mis_sub_canonical.history); - other.radiance = t_neighbor_at_canonical.color; - } else { - other = unpack_reservoir(neighbor, max_history); - other.radiance = evaluate_reflected_light(surface, other.selected_light_index, other.selected_uv); + // 4: gather the list of neighbors (withing the workgroup) to resample. + let max_accepted = min(MAX_RESAMPLE, accepted_count + parameters.spatial_taps); + let num_candidates = parameters.spatial_taps * 2u; + for (var candidates = num_candidates; candidates > 0u && accepted_count < max_accepted; candidates -= 1u) { + let other_cache_index = random_u32(rng) % (GROUP_SIZE.x * GROUP_SIZE.y); + let other = pixel_cache[other_cache_index]; + if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { + // if the surfaces are too different, there is no trust in this sample + //if (compare_surfaces(surface, other.surface) > 0.1) { + //accepted_local_indices[accepted_count] = other_cache_index; + accepted_count += 1u; + //} } + } - if (DECOUPLED_SHADING) { - color_and_weight += other.weight_sum * vec4(neighbor.contribution_weight * other.radiance, 1.0); - } - if (other.weight_sum <= 0.0) { - bump_reservoir(&reservoir, other.history); - } else { - merge_reservoir(&reservoir, other, random_gen(rng)); - } + // 5: evaluate the MIS of each of the samples versus the canonical one. + let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + var state = ResampleState(); + state.mis_canonical = BASE_CANONICAL_MIS; + for (var lid = 0u; lid < accepted_count; lid += 1u) { + let other_local_index = accepted_local_indices[lid]; + //let other = pixel_cache[other_local_index]; + //let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, other_local_index == local_index); + //resample(base, &state, other, max_history, rng, enable_debug); } - // Finally, merge in the canonical sample + // 6: merge in the canonical sample. if (PAIRWISE_MIS) { - canonical.weight_sum *= mis_canonical / canonical.history; + canonical.weight_sum *= state.mis_canonical / canonical.history; } if (DECOUPLED_SHADING) { //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + let cw = canonical.weight_sum / max(canonical.selected_target_score * state.mis_canonical, 0.1); + state.color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); } + //TODO: https://github.com/gfx-rs/wgpu/issues/6131 + var reservoir = state.reservoir; merge_reservoir(&reservoir, canonical, random_gen(rng)); + // 7: finish let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS); let stored = pack_reservoir_detail(reservoir, effective_history); reservoirs[pixel_index] = stored; var ro = RestirOutput(); if (DECOUPLED_SHADING) { - ro.radiance = color_and_weight.xyz / max(color_and_weight.w, 0.001); + ro.radiance = state.color_and_weight.xyz / max(state.color_and_weight.w, 0.001); } else { - ro.radiance = stored.contribution_weight * reservoir.radiance; + ro.radiance = stored.contribution_weight * state.reservoir.radiance; } return ro; } -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { +@compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_index) local_index: u32, +) { if (any(global_id.xy >= camera.target_size)) { return; } @@ -512,7 +542,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3) { let surface = read_surface(vec2(global_id.xy)); let enable_debug = all(global_id.xy == debug.mouse_pos); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(global_id.xy), &rng, enable_restir_debug); + let ro = compute_restir(surface, vec2(global_id.xy), &rng, local_index, enable_restir_debug); let color = ro.radiance; if (enable_debug) { debug_buf.variance.color_sum += color; From aa31733d255f717856b54a3e7ffc32ab28cd0955 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 19 Aug 2024 23:57:26 -0700 Subject: [PATCH 02/12] Work around workgroup memory addressing GPU hang --- blade-render/code/ray-trace.wgsl | 22 ++++++++++++---------- blade-render/src/render/mod.rs | 10 ++++++++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 1a5b1736..56ebcd90 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -23,7 +23,8 @@ const BASE_CANONICAL_MIS: f32 = 0.05; // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; -const GROUP_SIZE: vec2 = vec2(8, 8); +//TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 +const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; struct MainParams { @@ -356,7 +357,7 @@ struct ResampleState { color_and_weight: vec4, } -/*fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { +fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { var live: LiveReservoir; let neighbor = other.reservoir; if (PAIRWISE_MIS) { @@ -399,12 +400,13 @@ struct ResampleState { if (DECOUPLED_SHADING) { (*state).color_and_weight += live.weight_sum * vec4(neighbor.contribution_weight * live.radiance, 1.0); } + /* if (live.weight_sum <= 0.0) { bump_reservoir(&(*state).reservoir, live.history); } else { merge_reservoir(&(*state).reservoir, live, random_gen(rng)); - } -}*/ + }*/ +} struct RestirOutput { radiance: vec3, @@ -483,10 +485,10 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0.0) { // if the surfaces are too different, there is no trust in this sample - //if (compare_surfaces(surface, other.surface) > 0.1) { - //accepted_local_indices[accepted_count] = other_cache_index; + if (compare_surfaces(surface, other.surface) > 0.1) { + accepted_local_indices[accepted_count] = other_cache_index; accepted_count += 1u; - //} + } } } @@ -496,9 +498,9 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(); shader.check_struct_size::(); let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { + let pipeline = gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { name: "ray-trace", data_layouts: &[&layout], compute: shader.at("main"), - }) + }); + + let pl_struct_size = shader.get_struct_size("PixelCache"); + let group_size = pipeline.get_workgroup_size(); + let wg_required = pl_struct_size * group_size[0] * group_size[1]; + log::info!("Using {} workgroup memory for RT", wg_required); + pipeline } fn create_temporal_accum( From 32fd30cc172393e356f85f3be2641a22ad04e9a7 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 21 Aug 2024 21:47:48 -0700 Subject: [PATCH 03/12] Hook up the resampling --- blade-render/code/ray-trace.wgsl | 69 +++++++++++++++++--------------- examples/scene/main.rs | 2 + 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 56ebcd90..fbf65a58 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -351,15 +351,16 @@ struct ResampleBase { world_pos: vec3, accepted_count: f32, } -struct ResampleState { - reservoir: LiveReservoir, +struct ResampleResult { + selected: bool, mis_canonical: f32, color_and_weight: vec4, } -fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { - var live: LiveReservoir; +fn resample(dst: ptr, base: ResampleBase, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) -> ResampleResult { + var src: LiveReservoir; let neighbor = other.reservoir; + var rr = ResampleResult(); if (PAIRWISE_MIS) { let debug_len = select(0.0, other.surface.depth * 0.2, enable_debug); let canonical = base.canonical; @@ -370,7 +371,7 @@ fn resample(base: ResampleBase, state: ptr, other: Pixe let mis_sub_canonical = balance_heuristic( t_canonical_at_neighbor.score, canonical.selected_target_score, neighbor_history * base.accepted_count, canonical.history); - (*state).mis_canonical += 1.0 - mis_sub_canonical.weight; + rr.mis_canonical = 1.0 - mis_sub_canonical.weight; } // Notes about t_neighbor_at_neighbor: @@ -384,28 +385,29 @@ fn resample(base: ResampleBase, state: ptr, other: Pixe neighbor.target_score, t_neighbor_at_canonical.score, neighbor_history * base.accepted_count, canonical.history); - live.history = neighbor_history; - live.selected_light_index = neighbor.light_index; - live.selected_uv = neighbor.light_uv; - live.selected_target_score = t_neighbor_at_canonical.score; - live.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; + src.history = neighbor_history; + src.selected_light_index = neighbor.light_index; + src.selected_uv = neighbor.light_uv; + src.selected_target_score = t_neighbor_at_canonical.score; + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; //Note: should be needed according to the paper - // live.history *= min(mis_neighbor.history, mis_sub_canonical.history); - live.radiance = t_neighbor_at_canonical.color; + // src.history *= min(mis_neighbor.history, mis_sub_canonical.history); + src.radiance = t_neighbor_at_canonical.color; } else { - live = unpack_reservoir(neighbor, max_history); - live.radiance = evaluate_reflected_light(base.surface, live.selected_light_index, live.selected_uv); + src = unpack_reservoir(neighbor, max_history); + src.radiance = evaluate_reflected_light(base.surface, src.selected_light_index, src.selected_uv); } if (DECOUPLED_SHADING) { - (*state).color_and_weight += live.weight_sum * vec4(neighbor.contribution_weight * live.radiance, 1.0); + rr.color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } - /* - if (live.weight_sum <= 0.0) { - bump_reservoir(&(*state).reservoir, live.history); + if (src.weight_sum <= 0.0) { + bump_reservoir(dst, src.history); } else { - merge_reservoir(&(*state).reservoir, live, random_gen(rng)); - }*/ + merge_reservoir(dst, src, random_gen(rng)); + rr.selected = true; + } + return rr; } struct RestirOutput { @@ -477,11 +479,11 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { - let other_cache_index = random_u32(rng) % (GROUP_SIZE.x * GROUP_SIZE.y); + let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; let other = pixel_cache[other_cache_index]; if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample @@ -494,26 +496,29 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(0.0); for (var lid = 0u; lid < accepted_count; lid += 1u) { let other_local_index = accepted_local_indices[lid]; let other = pixel_cache[other_local_index]; let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, other_local_index == local_index); - resample(base, &state, other, max_history, rng, enable_debug); + let rr = resample(&reservoir, base, other, max_history, rng, enable_debug); + mis_canonical += rr.mis_canonical; + if (DECOUPLED_SHADING) { + color_and_weight += rr.color_and_weight; + } } // 6: merge in the canonical sample. if (PAIRWISE_MIS) { - canonical.weight_sum *= state.mis_canonical / canonical.history; + canonical.weight_sum *= mis_canonical / canonical.history; } if (DECOUPLED_SHADING) { //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * state.mis_canonical, 0.1); - state.color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); + color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); } - //TODO: https://github.com/gfx-rs/wgpu/issues/6131 - var reservoir = state.reservoir; merge_reservoir(&reservoir, canonical, random_gen(rng)); // 7: finish @@ -522,9 +527,9 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr Date: Wed, 21 Aug 2024 22:17:49 -0700 Subject: [PATCH 04/12] Reject spatial samples that are too close --- blade-render/code/ray-trace.wgsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index fbf65a58..3413ea63 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -12,6 +12,7 @@ const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; +const MIN_RESAMPLE_DISTANCE: i32 = 3; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; @@ -229,6 +230,10 @@ fn read_prev_surface(pixel: vec2) -> Surface { return surface; } +fn index_to_coord(index: u32) -> vec2 { + return vec2(vec2(index % GROUP_SIZE.x, index / GROUP_SIZE.x)); +} + fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { let lambert_brdf = 1.0 / PI; let lambert_term = qrot(qinv(surface.basis), dir).z; @@ -418,7 +423,6 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(surface.depth / camera.depth)); } - pixel_cache[local_index] = PixelCache(); let ray_dir = get_ray_direction(camera, pixel); let pixel_index = get_reservoir_index(pixel, camera); if (surface.depth == 0.0) { @@ -474,16 +478,24 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; + let diff = index_to_coord(other_cache_index) - local_xy; + if (dot(diff, diff) < MIN_RESAMPLE_DISTANCE * MIN_RESAMPLE_DISTANCE) { + continue; + } let other = pixel_cache[other_cache_index]; if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample From 6eb0bbe8b4965c6d1cd3a9917a2aacef450120ca Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 21 Aug 2024 23:52:20 -0700 Subject: [PATCH 05/12] Jitter the ray compute grid --- Cargo.toml | 4 ++-- blade-helpers/src/hud.rs | 5 +++-- blade-render/Cargo.toml | 1 + blade-render/code/ray-trace.wgsl | 26 ++++++++++++------------ blade-render/src/render/mod.rs | 35 ++++++++++++++++++++++++++++---- examples/scene/main.rs | 3 ++- src/lib.rs | 3 ++- 7 files changed, 54 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 03c104ca..681d4619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ gltf = { version = "1.1", default-features = false } log = "0.4" mint = "0.5" naga = { version = "22", features = ["wgsl-in"] } +nanorand = { version = "0.7", default-features = false } profiling = "1" slab = "0.4" strum = { version = "0.25", features = ["derive"] } @@ -79,7 +80,7 @@ glam = { workspace = true } log = { workspace = true } mint = { workspace = true, features = ["serde"] } naga = { workspace = true } -nanorand = { version = "0.7", default-features = false, features = ["wyrand"] } +nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } ron = "0.8" serde = { version = "1", features = ["serde_derive"] } @@ -95,7 +96,6 @@ egui-winit = "0.28" console_error_panic_hook = "0.1.7" console_log = "1" web-sys = { workspace = true, features = ["Window"] } -getrandom = { version = "0.2", features = ["js"] } [target.'cfg(any(target_os = "windows", target_os = "linux"))'.dev-dependencies] renderdoc = "0.12" diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index a135a337..e8d0fea8 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -22,9 +22,10 @@ impl ExposeHud for blade_render::RayConfig { egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) .text("Spatial tap history"), ); + ui.checkbox(&mut self.spatial_jitter, "Spatial jittering"); ui.add( - egui::widgets::Slider::new(&mut self.spatial_radius, 1..=50) - .text("Spatial radius (px)"), + egui::widgets::Slider::new(&mut self.spatial_min_distance, 1..=10) + .text("Spatial minimum distance (px)"), ); ui.add( egui::widgets::Slider::new(&mut self.t_start, 0.001..=0.5) diff --git a/blade-render/Cargo.toml b/blade-render/Cargo.toml index 85a67a8f..002166de 100644 --- a/blade-render/Cargo.toml +++ b/blade-render/Cargo.toml @@ -39,6 +39,7 @@ glam = { workspace = true } log = { workspace = true } mikktspace = { package = "bevy_mikktspace", version = "0.12", optional = true } mint = { workspace = true } +nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } slab = { workspace = true, optional = true } strum = { workspace = true } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 3413ea63..83acd7ab 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -12,7 +12,6 @@ const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; -const MIN_RESAMPLE_DISTANCE: i32 = 3; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; @@ -36,9 +35,10 @@ struct MainParams { temporal_history: u32, spatial_taps: u32, spatial_tap_history: u32, - spatial_radius: i32, + spatial_min_distance: i32, t_start: f32, use_motion_vectors: u32, + grid_offset: vec2, } var camera: CameraParams; @@ -231,7 +231,7 @@ fn read_prev_surface(pixel: vec2) -> Surface { } fn index_to_coord(index: u32) -> vec2 { - return vec2(vec2(index % GROUP_SIZE.x, index / GROUP_SIZE.x)); + return vec2((vec2(index, index / GROUP_SIZE.x) + GROUP_SIZE - parameters.grid_offset) % GROUP_SIZE); } fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { @@ -478,8 +478,6 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; let diff = index_to_coord(other_cache_index) - local_xy; - if (dot(diff, diff) < MIN_RESAMPLE_DISTANCE * MIN_RESAMPLE_DISTANCE) { + if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; } let other = pixel_cache[other_cache_index]; - if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { + if (other.reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample if (compare_surfaces(surface, other.surface) > 0.1) { accepted_local_indices[accepted_count] = other_cache_index; @@ -551,22 +549,24 @@ fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_index) local_index: u32, ) { - if (any(global_id.xy >= camera.target_size)) { + pixel_cache[local_index].reservoir.confidence = 0.0; + let pixel_coord = global_id.xy - parameters.grid_offset; + if (any(pixel_coord >= camera.target_size)) { return; } - let global_index = global_id.y * camera.target_size.x + global_id.x; + let global_index = pixel_coord.y * camera.target_size.x + pixel_coord.x; var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(vec2(global_id.xy)); - let enable_debug = all(global_id.xy == debug.mouse_pos); + let surface = read_surface(vec2(pixel_coord)); + let enable_debug = all(pixel_coord == debug.mouse_pos); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(global_id.xy), &rng, local_index, enable_restir_debug); + let ro = compute_restir(surface, vec2(pixel_coord), &rng, local_index, enable_restir_debug); let color = ro.radiance; if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; debug_buf.variance.count += 1u; } - textureStore(out_diffuse, global_id.xy, vec4(color, 1.0)); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); } diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index dc89c622..22347104 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -93,7 +93,11 @@ pub struct RayConfig { pub temporal_history: u32, pub spatial_taps: u32, pub spatial_tap_history: u32, - pub spatial_radius: u32, + /// Minimal distance to a spatially reused pixel (in the current frame). + pub spatial_min_distance: u32, + /// Enable jittering of the compute grid, to allow spatial samples to mix + /// outside of the original workgroup pixel bounds. + pub spatial_jitter: bool, pub t_start: f32, } @@ -322,6 +326,7 @@ pub struct Renderer { textures: blade_graphics::TextureArray, samplers: Samplers, reservoir_size: u32, + grid_jitter: [u32; 2], debug: DebugRender, surface_size: blade_graphics::Extent, surface_info: blade_graphics::SurfaceInfo, @@ -331,6 +336,7 @@ pub struct Renderer { // This way we can embed user info into the allocator. texture_resource_lookup: HashMap>, + random: nanorand::WyRand, } #[repr(C)] @@ -363,9 +369,10 @@ struct MainParams { temporal_history: u32, spatial_taps: u32, spatial_tap_history: u32, - spatial_radius: u32, + spatial_min_distance: u32, t_start: f32, use_motion_vectors: u32, + grid_offset: [u32; 2], } #[derive(blade_macros::ShaderData)] @@ -720,12 +727,14 @@ impl Renderer { textures: blade_graphics::TextureArray::new(), samplers, reservoir_size: sp.reservoir_size, + grid_jitter: [0; 2], debug, surface_size: config.surface_size, surface_info: config.surface_info, frame_index: 0, frame_scene_built: 0, texture_resource_lookup: HashMap::default(), + random: nanorand::WyRand::new(), } } @@ -1109,6 +1118,12 @@ impl Renderer { } self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); self.post_proc_input_index = self.frame_index % 2; + + self.grid_jitter = { + let wg_size = self.main_pipeline.get_workgroup_size(); + let random = nanorand::Rng::generate::(&mut self.random) as u32; + [random % wg_size[0], (random / wg_size[0]) % wg_size[1]] + }; } /// Ray trace the scene. @@ -1152,8 +1167,19 @@ impl Renderer { } if let mut pass = command_encoder.compute() { + let grid_offset = if ray_config.spatial_jitter { + self.grid_jitter + } else { + [0; 2] + }; + let groups = { + let mut grid_size = self.surface_size; + grid_size.width += grid_offset[0]; + grid_size.height += grid_offset[1]; + self.main_pipeline.get_dispatch_for(grid_size) + }; + let mut pc = pass.with(&self.main_pipeline); - let groups = self.main_pipeline.get_dispatch_for(self.surface_size); pc.bind( 0, &MainData { @@ -1169,9 +1195,10 @@ impl Renderer { temporal_history: ray_config.temporal_history, spatial_taps: ray_config.spatial_taps, spatial_tap_history: ray_config.spatial_tap_history, - spatial_radius: ray_config.spatial_radius, + spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, + grid_offset, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index diff --git a/examples/scene/main.rs b/examples/scene/main.rs index b31545c0..632e9f95 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -264,7 +264,8 @@ impl Example { temporal_history: 10, spatial_taps: 1, spatial_tap_history: 5, - spatial_radius: 10, + spatial_min_distance: 4, + spatial_jitter: true, t_start: 0.1, }, denoiser_enabled: true, diff --git a/src/lib.rs b/src/lib.rs index 5c8320b6..4c471825 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -489,7 +489,8 @@ impl Engine { temporal_history: 10, spatial_taps: 1, spatial_tap_history: 5, - spatial_radius: 10, + spatial_min_distance: 4, + spatial_jitter: true, t_start: 0.01, }, denoiser_enabled: true, From 41e9b6fa52b24c2d6356cf08a79f317be7c42c23 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 22 Aug 2024 22:58:12 -0700 Subject: [PATCH 06/12] Scaling jittering --- blade-render/code/ray-trace.wgsl | 41 +++++++++++++------------ blade-render/src/render/mod.rs | 52 ++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 83acd7ab..db06b5e4 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -39,6 +39,7 @@ struct MainParams { t_start: f32, use_motion_vectors: u32, grid_offset: vec2, + grid_scale: vec2, } var camera: CameraParams; @@ -96,12 +97,6 @@ fn get_reservoir_index(pixel: vec2, camera: CameraParams) -> i32 { } } -fn get_pixel_from_reservoir_index(index: i32, camera: CameraParams) -> vec2 { - let y = index / i32(camera.target_size.x); - let x = index - y * i32(camera.target_size.x); - return vec2(x, y); -} - fn bump_reservoir(r: ptr, history: f32) { (*r).history += history; } @@ -156,7 +151,8 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { var t_depth: texture_2d; var t_prev_depth: texture_2d; var t_basis: texture_2d; -var t_prev_basis: texture_2d; +var t_prev_basis: texture_2d +; var t_flat_normal: texture_2d; var t_prev_flat_normal: texture_2d; var t_motion: texture_2d; @@ -230,8 +226,13 @@ fn read_prev_surface(pixel: vec2) -> Surface { return surface; } -fn index_to_coord(index: u32) -> vec2 { - return vec2((vec2(index, index / GROUP_SIZE.x) + GROUP_SIZE - parameters.grid_offset) % GROUP_SIZE); +fn thread_index_to_coord(thread_index: u32, group_id: vec3) -> vec2 { + let cluster_id = group_id.xy / parameters.grid_scale; + let cluster_offset = group_id.xy - cluster_id * parameters.grid_scale; + let local_id = vec2(thread_index % GROUP_SIZE.x, thread_index / GROUP_SIZE.x); + let global_id = (cluster_id * GROUP_SIZE + local_id) * parameters.grid_scale + cluster_offset; + //TODO: also use the offset + return vec2(global_id); } fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { @@ -419,7 +420,10 @@ struct RestirOutput { radiance: vec3, } -fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, local_index: u32, enable_debug: bool) -> RestirOutput { +fn compute_restir( + surface: Surface, pixel: vec2, rng: ptr, + local_index: u32, group_id: vec3, enable_debug: bool, +) -> RestirOutput { if (debug.view_mode == DebugMode_Depth) { textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } @@ -487,10 +491,9 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; - let diff = index_to_coord(other_cache_index) - local_xy; + let diff = thread_index_to_coord(other_cache_index, group_id) - pixel; if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; } @@ -546,22 +549,22 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, + @builtin(workgroup_id) group_id: vec3, @builtin(local_invocation_index) local_index: u32, ) { pixel_cache[local_index].reservoir.confidence = 0.0; - let pixel_coord = global_id.xy - parameters.grid_offset; - if (any(pixel_coord >= camera.target_size)) { + let pixel_coord = thread_index_to_coord(local_index, group_id); + if (any(vec2(pixel_coord) >= camera.target_size)) { return; } - let global_index = pixel_coord.y * camera.target_size.x + pixel_coord.x; + let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(vec2(pixel_coord)); - let enable_debug = all(pixel_coord == debug.mouse_pos); + let surface = read_surface(pixel_coord); + let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(pixel_coord), &rng, local_index, enable_restir_debug); + let ro = compute_restir(surface, pixel_coord, &rng, local_index, group_id, enable_restir_debug); let color = ro.radiance; if (enable_debug) { debug_buf.variance.color_sum += color; diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 22347104..d5996c87 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -11,6 +11,7 @@ pub use env_map::EnvironmentMap; use std::{collections::HashMap, mem, num::NonZeroU32, path::Path, ptr}; const MAX_RESOURCES: u32 = 8192; +const GRID_SCALES: u32 = 4; const RADIANCE_FORMAT: blade_graphics::TextureFormat = blade_graphics::TextureFormat::Rgba16Float; fn mat4_transform(t: &blade_graphics::Transform) -> glam::Mat4 { @@ -299,6 +300,20 @@ struct Blur { atrous_pipeline: blade_graphics::ComputePipeline, } +#[derive(Clone, Copy)] +struct GridMapping { + offset: mint::Vector2, + scale: mint::Vector2, +} +impl Default for GridMapping { + fn default() -> Self { + Self { + offset: [0; 2].into(), + scale: [1; 2].into(), + } + } +} + /// Blade Renderer is a comprehensive rendering solution for /// end user applications. /// @@ -326,7 +341,7 @@ pub struct Renderer { textures: blade_graphics::TextureArray, samplers: Samplers, reservoir_size: u32, - grid_jitter: [u32; 2], + grid_mapping: GridMapping, debug: DebugRender, surface_size: blade_graphics::Extent, surface_info: blade_graphics::SurfaceInfo, @@ -373,6 +388,7 @@ struct MainParams { t_start: f32, use_motion_vectors: u32, grid_offset: [u32; 2], + grid_scale: [u32; 2], } #[derive(blade_macros::ShaderData)] @@ -727,7 +743,7 @@ impl Renderer { textures: blade_graphics::TextureArray::new(), samplers, reservoir_size: sp.reservoir_size, - grid_jitter: [0; 2], + grid_mapping: GridMapping::default(), debug, surface_size: config.surface_size, surface_info: config.surface_info, @@ -1119,10 +1135,19 @@ impl Renderer { self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); self.post_proc_input_index = self.frame_index % 2; - self.grid_jitter = { + self.grid_mapping = { let wg_size = self.main_pipeline.get_workgroup_size(); - let random = nanorand::Rng::generate::(&mut self.random) as u32; - [random % wg_size[0], (random / wg_size[0]) % wg_size[1]] + let random = nanorand::Rng::generate::(&mut self.random); + let r_offset = random as u32; + let r_scale = (random >> 32) as u32; + GridMapping { + offset: [r_offset % wg_size[0], (r_offset / wg_size[0]) % wg_size[1]].into(), + scale: [ + 1 << (r_scale % GRID_SCALES), + 1 << ((r_scale / GRID_SCALES) % GRID_SCALES), + ] + .into(), + } }; } @@ -1167,18 +1192,12 @@ impl Renderer { } if let mut pass = command_encoder.compute() { - let grid_offset = if ray_config.spatial_jitter { - self.grid_jitter + let grid_mapping = if ray_config.spatial_jitter { + self.grid_mapping } else { - [0; 2] + GridMapping::default() }; - let groups = { - let mut grid_size = self.surface_size; - grid_size.width += grid_offset[0]; - grid_size.height += grid_offset[1]; - self.main_pipeline.get_dispatch_for(grid_size) - }; - + let groups = self.main_pipeline.get_dispatch_for(self.surface_size); let mut pc = pass.with(&self.main_pipeline); pc.bind( 0, @@ -1198,7 +1217,8 @@ impl Renderer { spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - grid_offset, + grid_offset: grid_mapping.offset.into(), + grid_scale: grid_mapping.scale.into(), }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index From 8fd67a626a816cc57bbc2b53dc76f4b550dd2cd4 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 22 Aug 2024 23:33:57 -0700 Subject: [PATCH 07/12] Refine group mixing and visualization --- blade-helpers/src/hud.rs | 2 +- blade-render/Cargo.toml | 1 - blade-render/code/color.inc.wgsl | 19 ++++++++++++ blade-render/code/ray-trace.wgsl | 12 ++++++-- blade-render/src/render/mod.rs | 51 +++++--------------------------- examples/scene/main.rs | 2 +- src/lib.rs | 2 +- 7 files changed, 40 insertions(+), 49 deletions(-) create mode 100644 blade-render/code/color.inc.wgsl diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index e8d0fea8..ff4cf5a5 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -22,7 +22,7 @@ impl ExposeHud for blade_render::RayConfig { egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) .text("Spatial tap history"), ); - ui.checkbox(&mut self.spatial_jitter, "Spatial jittering"); + ui.add(egui::widgets::Slider::new(&mut self.group_mixer, 1..=10).text("Group mixer")); ui.add( egui::widgets::Slider::new(&mut self.spatial_min_distance, 1..=10) .text("Spatial minimum distance (px)"), diff --git a/blade-render/Cargo.toml b/blade-render/Cargo.toml index 002166de..85a67a8f 100644 --- a/blade-render/Cargo.toml +++ b/blade-render/Cargo.toml @@ -39,7 +39,6 @@ glam = { workspace = true } log = { workspace = true } mikktspace = { package = "bevy_mikktspace", version = "0.12", optional = true } mint = { workspace = true } -nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } slab = { workspace = true, optional = true } strum = { workspace = true } diff --git a/blade-render/code/color.inc.wgsl b/blade-render/code/color.inc.wgsl new file mode 100644 index 00000000..8e84cfda --- /dev/null +++ b/blade-render/code/color.inc.wgsl @@ -0,0 +1,19 @@ +fn hsv_to_rgb(h: f32, s: f32, v: f32) -> vec3 { + let c = v * s; + let x = c * (1.0 - abs((h / 60.0) % 2.0 - 1.0)); + var q = vec3(v - c); + if (h < 60.0) { + q.r += c; q.g += x; + } else if (h < 120.0) { + q.g += c; q.r += x; + } else if (h < 180.0) { + q.g += c; q.b += x; + } else if (h < 240.0) { + q.b += c; q.g += x; + } else if (h < 300.0) { + q.b += c; q.r += x; + } else { + q.r += c; q.b += x; + } + return q; +} diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index db06b5e4..416a1acb 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -1,3 +1,4 @@ +#include "color.inc.wgsl" #include "quaternion.inc.wgsl" #include "random.inc.wgsl" #include "env-importance.inc.wgsl" @@ -26,6 +27,7 @@ const DECOUPLED_SHADING: bool = false; //TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; +const GROUP_VISUALIZE: bool = false; struct MainParams { frame_index: u32, @@ -38,7 +40,6 @@ struct MainParams { spatial_min_distance: i32, t_start: f32, use_motion_vectors: u32, - grid_offset: vec2, grid_scale: vec2, } @@ -231,7 +232,6 @@ fn thread_index_to_coord(thread_index: u32, group_id: vec3) -> vec2 { let cluster_offset = group_id.xy - cluster_id * parameters.grid_scale; let local_id = vec2(thread_index % GROUP_SIZE.x, thread_index / GROUP_SIZE.x); let global_id = (cluster_id * GROUP_SIZE + local_id) * parameters.grid_scale + cluster_offset; - //TODO: also use the offset return vec2(global_id); } @@ -557,6 +557,14 @@ fn main( if (any(vec2(pixel_coord) >= camera.target_size)) { return; } + if (GROUP_VISUALIZE) + { + var rng = random_init(group_id.y * 1000u + group_id.x, 0u); + let h = random_gen(&rng) * 360.0; + let color = hsv_to_rgb(h, 0.5, 1.0); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); + return; + } let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index d5996c87..1eec948f 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -11,7 +11,6 @@ pub use env_map::EnvironmentMap; use std::{collections::HashMap, mem, num::NonZeroU32, path::Path, ptr}; const MAX_RESOURCES: u32 = 8192; -const GRID_SCALES: u32 = 4; const RADIANCE_FORMAT: blade_graphics::TextureFormat = blade_graphics::TextureFormat::Rgba16Float; fn mat4_transform(t: &blade_graphics::Transform) -> glam::Mat4 { @@ -96,9 +95,9 @@ pub struct RayConfig { pub spatial_tap_history: u32, /// Minimal distance to a spatially reused pixel (in the current frame). pub spatial_min_distance: u32, - /// Enable jittering of the compute grid, to allow spatial samples to mix + /// Scale and mix the groups into clusters, to allow spatial samples to mix /// outside of the original workgroup pixel bounds. - pub spatial_jitter: bool, + pub group_mixer: u32, pub t_start: f32, } @@ -300,20 +299,6 @@ struct Blur { atrous_pipeline: blade_graphics::ComputePipeline, } -#[derive(Clone, Copy)] -struct GridMapping { - offset: mint::Vector2, - scale: mint::Vector2, -} -impl Default for GridMapping { - fn default() -> Self { - Self { - offset: [0; 2].into(), - scale: [1; 2].into(), - } - } -} - /// Blade Renderer is a comprehensive rendering solution for /// end user applications. /// @@ -341,7 +326,6 @@ pub struct Renderer { textures: blade_graphics::TextureArray, samplers: Samplers, reservoir_size: u32, - grid_mapping: GridMapping, debug: DebugRender, surface_size: blade_graphics::Extent, surface_info: blade_graphics::SurfaceInfo, @@ -351,7 +335,6 @@ pub struct Renderer { // This way we can embed user info into the allocator. texture_resource_lookup: HashMap>, - random: nanorand::WyRand, } #[repr(C)] @@ -387,7 +370,6 @@ struct MainParams { spatial_min_distance: u32, t_start: f32, use_motion_vectors: u32, - grid_offset: [u32; 2], grid_scale: [u32; 2], } @@ -743,14 +725,12 @@ impl Renderer { textures: blade_graphics::TextureArray::new(), samplers, reservoir_size: sp.reservoir_size, - grid_mapping: GridMapping::default(), debug, surface_size: config.surface_size, surface_info: config.surface_info, frame_index: 0, frame_scene_built: 0, texture_resource_lookup: HashMap::default(), - random: nanorand::WyRand::new(), } } @@ -1134,21 +1114,6 @@ impl Renderer { } self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); self.post_proc_input_index = self.frame_index % 2; - - self.grid_mapping = { - let wg_size = self.main_pipeline.get_workgroup_size(); - let random = nanorand::Rng::generate::(&mut self.random); - let r_offset = random as u32; - let r_scale = (random >> 32) as u32; - GridMapping { - offset: [r_offset % wg_size[0], (r_offset / wg_size[0]) % wg_size[1]].into(), - scale: [ - 1 << (r_scale % GRID_SCALES), - 1 << ((r_scale / GRID_SCALES) % GRID_SCALES), - ] - .into(), - } - }; } /// Ray trace the scene. @@ -1192,11 +1157,12 @@ impl Renderer { } if let mut pass = command_encoder.compute() { - let grid_mapping = if ray_config.spatial_jitter { - self.grid_mapping - } else { - GridMapping::default() + let grid_scale = { + let limit = ray_config.group_mixer; + let r = self.frame_index as u32 ^ 0x5A; + [r % limit + 1, (r / limit) % limit + 1] }; + let groups = self.main_pipeline.get_dispatch_for(self.surface_size); let mut pc = pass.with(&self.main_pipeline); pc.bind( @@ -1217,8 +1183,7 @@ impl Renderer { spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - grid_offset: grid_mapping.offset.into(), - grid_scale: grid_mapping.scale.into(), + grid_scale, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 632e9f95..903a30ca 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -265,7 +265,7 @@ impl Example { spatial_taps: 1, spatial_tap_history: 5, spatial_min_distance: 4, - spatial_jitter: true, + group_mixer: 10, t_start: 0.1, }, denoiser_enabled: true, diff --git a/src/lib.rs b/src/lib.rs index 4c471825..b14b8ebe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -490,7 +490,7 @@ impl Engine { spatial_taps: 1, spatial_tap_history: 5, spatial_min_distance: 4, - spatial_jitter: true, + group_mixer: 10, t_start: 0.01, }, denoiser_enabled: true, From e625b50bd8d509fa090fcd0041b2c48902d0c227 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 24 Aug 2024 00:52:38 -0700 Subject: [PATCH 08/12] True separation between temporal and spatial resampling --- blade-render/code/ray-trace.wgsl | 209 ++++++++++++++++++------------- 1 file changed, 123 insertions(+), 86 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 416a1acb..d614a167 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -360,10 +360,13 @@ struct ResampleBase { struct ResampleResult { selected: bool, mis_canonical: f32, - color_and_weight: vec4, } -fn resample(dst: ptr, base: ResampleBase, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) -> ResampleResult { +fn resample( + dst: ptr, color_and_weight: ptr>, + base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, + rng: ptr, enable_debug: bool, +) -> ResampleResult { var src: LiveReservoir; let neighbor = other.reservoir; var rr = ResampleResult(); @@ -373,7 +376,7 @@ fn resample(dst: ptr, base: ResampleBase, other: PixelC let neighbor_history = min(neighbor.confidence, f32(max_history)); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); let mis_sub_canonical = balance_heuristic( t_canonical_at_neighbor.score, canonical.selected_target_score, neighbor_history * base.accepted_count, canonical.history); @@ -405,7 +408,7 @@ fn resample(dst: ptr, base: ResampleBase, other: PixelC } if (DECOUPLED_SHADING) { - rr.color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); + *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } if (src.weight_sum <= 0.0) { bump_reservoir(dst, src.history); @@ -416,33 +419,51 @@ fn resample(dst: ptr, base: ResampleBase, other: PixelC return rr; } -struct RestirOutput { - radiance: vec3, +struct ResampleOutput { + reservoir: StoredReservoir, + color: vec3, } -fn compute_restir( - surface: Surface, pixel: vec2, rng: ptr, - local_index: u32, group_id: vec3, enable_debug: bool, -) -> RestirOutput { - if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); +fn finalize_resampling( + reservoir: ptr, color_and_weight: ptr>, + base: ResampleBase, mis_canonical: f32, rng: ptr, +) -> ResampleOutput { + var ro = ResampleOutput(); + var canonical = base.canonical; + if (PAIRWISE_MIS && canonical.history > 0.0) { + //TODO: fix the case of `mis_canonical` being too low + canonical.weight_sum *= mis_canonical / canonical.history; } - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - if (surface.depth == 0.0) { - reservoirs[pixel_index] = StoredReservoir(); - let env = evaluate_environment(ray_dir); - return RestirOutput(env); + merge_reservoir(reservoir, canonical, random_gen(rng)); + + if (base.accepted_count > 0.0) { + let effective_history = select((*reservoir).history, BASE_CANONICAL_MIS + base.accepted_count, PAIRWISE_MIS); + ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); + } else { + ro.reservoir = pack_reservoir(canonical); } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - let position = camera.position + surface.depth * ray_dir; - let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); - if (debug.view_mode == DebugMode_Normal) { - textureStore(out_debug, pixel, vec4(normal, 0.0)); + if (DECOUPLED_SHADING) { + //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? + let contribution_weight = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); + *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + ro.color = (*color_and_weight).xyz / max((*color_and_weight).w, 0.001); + } else { + ro.color = ro.reservoir.contribution_weight * (*reservoir).radiance; } + return ro; +} - // 1: build the canonical sample +fn resample_temporal( + surface: Surface, cur_pixel: vec2, position: vec3, + rng: ptr, enable_debug: bool +) -> ResampleOutput { + if (surface.depth == 0.0) { + return ResampleOutput(); + } + let debug_len = select(0.0, surface.depth * 0.2, enable_debug); + + // build the canonical sample var canonical = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; @@ -462,89 +483,107 @@ fn compute_restir( } //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(pixel, position)); - var accepted_count = 0u; - var accepted_local_indices = array(); + let prev_pixel = vec2(get_prev_pixel(cur_pixel, position)); - // 2: read the temporal sample. let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); - if (prev_reservoir_index >= 0) { - let prev_reservoir = prev_reservoirs[prev_reservoir_index]; - let prev_surface = read_prev_surface(prev_pixel); - let prev_dir = get_ray_direction(prev_camera, prev_pixel); - let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; - pixel_cache[local_index] = PixelCache(prev_surface, prev_reservoir, prev_world_pos); - - if (parameters.temporal_tap != 0u && prev_reservoir.confidence > 0.0) { - // if the surfaces are too different, there is no trust in this sample - if (compare_surfaces(surface, prev_surface) > 0.1) { - accepted_local_indices[0] = local_index; - accepted_count = 1u; - } - } + if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { + return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); } - //TODO: store the reservoir from this iteration, not the previous one - // 3: sync with the workgroup to ensure all reservoirs are available. - workgroupBarrier(); + let prev_reservoir = prev_reservoirs[prev_reservoir_index]; + let prev_surface = read_prev_surface(prev_pixel); + // if the surfaces are too different, there is no trust in this sample + if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { + return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); + } + + var reservoir = LiveReservoir(); + var color_and_weight = vec4(0.0); + let base = ResampleBase(surface, canonical, position, 1.0); + + let prev_dir = get_ray_direction(prev_camera, prev_pixel); + let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; + let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, enable_debug); + let mis_canonical = BASE_CANONICAL_MIS + rr.mis_canonical; - // 4: gather the list of neighbors (within the workgroup) to resample. - let max_accepted = min(MAX_RESAMPLE, accepted_count + parameters.spatial_taps); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); +} + +fn resample_spatial( + surface: Surface, cur_pixel: vec2, position: vec3, + group_id: vec3, canonical_stored: StoredReservoir, + rng: ptr, enable_debug: bool +) -> ResampleOutput { + if (surface.depth == 0.0) { + let dir = normalize(position - camera.position); + var ro = ResampleOutput(); + ro.color = evaluate_environment(dir); + return ro; + } + let debug_len = select(0.0, surface.depth * 0.2, enable_debug); + + // gather the list of neighbors (within the workgroup) to resample. + var accepted_count = 0u; + var accepted_local_indices = array(); + let max_accepted = min(MAX_RESAMPLE, parameters.spatial_taps); let num_candidates = parameters.spatial_taps * 3u; - for (var candidates = num_candidates; candidates > 0u && accepted_count < max_accepted; candidates -= 1u) { + for (var i = 0u; i < num_candidates && accepted_count < max_accepted; i += 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; - let diff = thread_index_to_coord(other_cache_index, group_id) - pixel; + let diff = thread_index_to_coord(other_cache_index, group_id) - cur_pixel; if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; } let other = pixel_cache[other_cache_index]; - if (other.reservoir.confidence > 0.0) { - // if the surfaces are too different, there is no trust in this sample - if (compare_surfaces(surface, other.surface) > 0.1) { - accepted_local_indices[accepted_count] = other_cache_index; - accepted_count += 1u; - } + // if the surfaces are too different, there is no trust in this sample + if (other.reservoir.confidence > 0.0 && compare_surfaces(surface, other.surface) > 0.1) { + accepted_local_indices[accepted_count] = other_cache_index; + accepted_count += 1u; } } - // 5: evaluate the MIS of each of the samples versus the canonical one. - let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + let canonical = unpack_reservoir(canonical_stored, ~0u); var reservoir = LiveReservoir(); var mis_canonical = BASE_CANONICAL_MIS; var color_and_weight = vec4(0.0); + let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + + // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { - let other_local_index = accepted_local_indices[lid]; - let other = pixel_cache[other_local_index]; - let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, other_local_index == local_index); - let rr = resample(&reservoir, base, other, max_history, rng, enable_debug); + let other = pixel_cache[accepted_local_indices[lid]]; + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, enable_debug); mis_canonical += rr.mis_canonical; - if (DECOUPLED_SHADING) { - color_and_weight += rr.color_and_weight; - } } - // 6: merge in the canonical sample. - if (PAIRWISE_MIS) { - canonical.weight_sum *= mis_canonical / canonical.history; - } - if (DECOUPLED_SHADING) { - //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); +} + +fn compute_restir( + pixel: vec2, local_index: u32, group_id: vec3, + rng: ptr, enable_debug: bool, +) -> vec3 { + let surface = read_surface(pixel); + if (debug.view_mode == DebugMode_Depth) { + textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } - merge_reservoir(&reservoir, canonical, random_gen(rng)); + let ray_dir = get_ray_direction(camera, pixel); + let pixel_index = get_reservoir_index(pixel, camera); - // 7: finish - let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS); - let stored = pack_reservoir_detail(reservoir, effective_history); - reservoirs[pixel_index] = stored; - var ro = RestirOutput(); - if (DECOUPLED_SHADING) { - ro.radiance = color_and_weight.xyz / max(color_and_weight.w, 0.001); - } else { - ro.radiance = stored.contribution_weight * reservoir.radiance; + let position = camera.position + surface.depth * ray_dir; + if (debug.view_mode == DebugMode_Normal) { + let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); + textureStore(out_debug, pixel, vec4(normal, 0.0)); } - return ro; + + let temporal = resample_temporal(surface, pixel, position, rng, enable_debug); + pixel_cache[local_index] = PixelCache(surface, temporal.reservoir, position); + + // sync with the workgroup to ensure all reservoirs are available. + workgroupBarrier(); + + let spatial = resample_spatial(surface, pixel, position, group_id, temporal.reservoir, rng, enable_debug); + reservoirs[pixel_index] = spatial.reservoir; + return spatial.color; } @compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) @@ -569,11 +608,9 @@ fn main( let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(pixel_coord); let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, pixel_coord, &rng, local_index, group_id, enable_restir_debug); - let color = ro.radiance; + let color = compute_restir(pixel_coord, local_index, group_id, &rng, enable_restir_debug); if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; From f872720f7c74143054036e10f4fdcd59768f7556 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 24 Aug 2024 23:40:06 -0700 Subject: [PATCH 09/12] Refactor MIS according to GRIS paper --- blade-render/code/ray-trace.wgsl | 47 ++++++++++++-------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index d614a167..054f6fc3 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -16,10 +16,7 @@ const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; -// Base MIS for canonical samples. The constant isolates a critical difference between -// Bitterli's pseudocode (where it's 1) and NVidia's RTXDI implementation (where it's 0). -// With Bitterli's 1 we have MIS not respecting the prior history enough. -const BASE_CANONICAL_MIS: f32 = 0.05; +const DEFENSIVE_MIS: bool = false; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; @@ -338,19 +335,6 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return brdf; } -struct HeuristicFactors { - weight: f32, - //history: f32, -} - -fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors { - var hf: HeuristicFactors; - let balance_denom = h0 * w0 + h1 * w1; - hf.weight = select(h0 * w0 / balance_denom, 0.0, balance_denom <= 0.0); - //hf.history = select(pow(clamp(w1 / w0, 0.0, 1.0), 8.0), 1.0, w0 <= 0.0); - return hf; -} - struct ResampleBase { surface: Surface, canonical: LiveReservoir, @@ -362,6 +346,8 @@ struct ResampleResult { mis_canonical: f32, } +const canonical_count: f32 = 1.0; + fn resample( dst: ptr, color_and_weight: ptr>, base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, @@ -377,10 +363,10 @@ fn resample( { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); - let mis_sub_canonical = balance_heuristic( - t_canonical_at_neighbor.score, canonical.selected_target_score, - neighbor_history * base.accepted_count, canonical.history); - rr.mis_canonical = 1.0 - mis_sub_canonical.weight; + let nom = canonical.selected_target_score * canonical.history; + let denom = canonical_count * nom + t_canonical_at_neighbor.score * neighbor_history * base.accepted_count; + let kf = 1.0 / select(base.accepted_count, canonical_count + base.accepted_count, DEFENSIVE_MIS); + rr.mis_canonical = kf * nom / max(0.01, denom); } // Notes about t_neighbor_at_neighbor: @@ -390,17 +376,16 @@ fn resample( //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); let t_neighbor_at_canonical = estimate_target_score_with_occlusion( base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let mis_neighbor = balance_heuristic( - neighbor.target_score, t_neighbor_at_canonical.score, - neighbor_history * base.accepted_count, canonical.history); + let nom = t_neighbor_at_canonical.score * canonical.history; + let denom = canonical_count * neighbor.target_score * neighbor_history + base.accepted_count * nom; + let kf = select(1.0, base.accepted_count / (canonical_count + base.accepted_count), DEFENSIVE_MIS); + let mis_neighbor = kf * nom / max(0.01, denom); src.history = neighbor_history; src.selected_light_index = neighbor.light_index; src.selected_uv = neighbor.light_uv; src.selected_target_score = t_neighbor_at_canonical.score; - src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; - //Note: should be needed according to the paper - // src.history *= min(mis_neighbor.history, mis_sub_canonical.history); + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor; src.radiance = t_neighbor_at_canonical.color; } else { src = unpack_reservoir(neighbor, max_history); @@ -437,7 +422,7 @@ fn finalize_resampling( merge_reservoir(reservoir, canonical, random_gen(rng)); if (base.accepted_count > 0.0) { - let effective_history = select((*reservoir).history, BASE_CANONICAL_MIS + base.accepted_count, PAIRWISE_MIS); + let effective_history = select((*reservoir).history, base.accepted_count, PAIRWISE_MIS); ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); } else { ro.reservoir = pack_reservoir(canonical); @@ -505,7 +490,8 @@ fn resample_temporal( let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, enable_debug); - let mis_canonical = BASE_CANONICAL_MIS + rr.mis_canonical; + let total_samples = 2.0; + let mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS) + rr.mis_canonical; return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } @@ -544,7 +530,8 @@ fn resample_spatial( let canonical = unpack_reservoir(canonical_stored, ~0u); var reservoir = LiveReservoir(); - var mis_canonical = BASE_CANONICAL_MIS; + let total_samples = 1.0 + f32(accepted_count); + var mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, f32(accepted_count)); From 5573b8eaa22891d44556374d0285fe7f64b45c45 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 25 Aug 2024 23:01:31 -0700 Subject: [PATCH 10/12] Fix groups, canonical sampling, refactor MIS --- blade-graphics/src/util.rs | 17 +-- blade-render/code/ray-trace.wgsl | 178 +++++++++++++++++++------------ blade-render/src/render/mod.rs | 24 ++++- 3 files changed, 142 insertions(+), 77 deletions(-) diff --git a/blade-graphics/src/util.rs b/blade-graphics/src/util.rs index 23e876e0..f0d2013e 100644 --- a/blade-graphics/src/util.rs +++ b/blade-graphics/src/util.rs @@ -94,14 +94,19 @@ impl super::TextureFormat { } } +impl super::Extent { + pub fn group_by(&self, size: [u32; 3]) -> [u32; 3] { + [ + (self.width + size[0] - 1) / size[0], + (self.height + size[1] - 1) / size[1], + (self.depth + size[2] - 1) / size[2], + ] + } +} + impl super::ComputePipeline { /// Return the dispatch group counts sufficient to cover the given extent. pub fn get_dispatch_for(&self, extent: super::Extent) -> [u32; 3] { - let wg_size = self.get_workgroup_size(); - [ - (extent.width + wg_size[0] - 1) / wg_size[0], - (extent.height + wg_size[1] - 1) / wg_size[1], - (extent.depth + wg_size[2] - 1) / wg_size[2], - ] + extent.group_by(self.get_workgroup_size()) } } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 054f6fc3..90cfdfef 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -16,7 +16,6 @@ const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; -const DEFENSIVE_MIS: bool = false; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; @@ -121,12 +120,12 @@ fn merge_reservoir(r: ptr, other: LiveReservoir, random return false; } } -fn unpack_reservoir(f: StoredReservoir, max_history: u32) -> LiveReservoir { +fn unpack_reservoir(f: StoredReservoir, max_history: u32, radiance: vec3) -> LiveReservoir { var r: LiveReservoir; r.selected_light_index = f.light_index; r.selected_uv = f.light_uv; r.selected_target_score = f.target_score; - r.radiance = vec3(0.0); // to be continued... + r.radiance = radiance; let history = min(f.confidence, f32(max_history)); r.weight_sum = f.contribution_weight * f.target_score * history; r.history = history; @@ -305,11 +304,11 @@ fn estimate_target_score_with_occlusion( if (check_ray_occluded(acs, position, direction, debug_len)) { return TargetScore(); - } else { - //Note: same as `evaluate_reflected_light` - let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; - return make_target_score(brdf * radiance); } + + //Note: same as `evaluate_reflected_light` + let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; + return make_target_score(brdf * radiance); } fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debug_len: f32) -> f32 { @@ -335,6 +334,30 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return brdf; } +fn produce_canonical( + surface: Surface, position: vec3, + rng: ptr, debug_len: f32, +) -> LiveReservoir { + var reservoir = LiveReservoir(); + for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { + var ls: LightSample; + if (parameters.environment_importance_sampling != 0u) { + ls = sample_light_from_environment(rng); + } else { + ls = sample_light_from_sphere(rng); + } + + let brdf = evaluate_sample(ls, surface, position, debug_len); + if (brdf > 0.0) { + let other = make_reservoir(ls, 0u, vec3(brdf)); + merge_reservoir(&reservoir, other, random_gen(rng)); + } else { + bump_reservoir(&reservoir, 1.0); + } + } + return reservoir; +} + struct ResampleBase { surface: Surface, canonical: LiveReservoir, @@ -344,29 +367,27 @@ struct ResampleBase { struct ResampleResult { selected: bool, mis_canonical: f32, + mis_sample: f32, } -const canonical_count: f32 = 1.0; - +// Resample following Algorithm 8 in section 9.1 of Bitterli thesis fn resample( dst: ptr, color_and_weight: ptr>, base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, - rng: ptr, enable_debug: bool, + rng: ptr, debug_len: f32, ) -> ResampleResult { var src: LiveReservoir; let neighbor = other.reservoir; var rr = ResampleResult(); if (PAIRWISE_MIS) { - let debug_len = select(0.0, other.surface.depth * 0.2, enable_debug); let canonical = base.canonical; let neighbor_history = min(neighbor.confidence, f32(max_history)); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); - let nom = canonical.selected_target_score * canonical.history; - let denom = canonical_count * nom + t_canonical_at_neighbor.score * neighbor_history * base.accepted_count; - let kf = 1.0 / select(base.accepted_count, canonical_count + base.accepted_count, DEFENSIVE_MIS); - rr.mis_canonical = kf * nom / max(0.01, denom); + let nom = canonical.selected_target_score * canonical.history / base.accepted_count; + let denom = t_canonical_at_neighbor.score * neighbor_history + nom; + rr.mis_canonical = select(0.0, nom / denom, denom > 0.0); } // Notes about t_neighbor_at_neighbor: @@ -376,10 +397,10 @@ fn resample( //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); let t_neighbor_at_canonical = estimate_target_score_with_occlusion( base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let nom = t_neighbor_at_canonical.score * canonical.history; - let denom = canonical_count * neighbor.target_score * neighbor_history + base.accepted_count * nom; - let kf = select(1.0, base.accepted_count / (canonical_count + base.accepted_count), DEFENSIVE_MIS); - let mis_neighbor = kf * nom / max(0.01, denom); + let nom = neighbor.target_score * neighbor_history; + let denom = nom + t_neighbor_at_canonical.score * canonical.history / base.accepted_count; + let mis_neighbor = select(0.0, nom / denom, denom > 0.0); + rr.mis_sample = mis_neighbor; src.history = neighbor_history; src.selected_light_index = neighbor.light_index; @@ -388,11 +409,14 @@ fn resample( src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor; src.radiance = t_neighbor_at_canonical.color; } else { - src = unpack_reservoir(neighbor, max_history); - src.radiance = evaluate_reflected_light(base.surface, src.selected_light_index, src.selected_uv); + rr.mis_canonical = 0.0; + rr.mis_sample = 1.0; + let radiance = evaluate_reflected_light(base.surface, neighbor.light_index, neighbor.light_uv); + src = unpack_reservoir(neighbor, max_history, radiance); } if (DECOUPLED_SHADING) { + //TODO: use `mis_neighbor`O *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } if (src.weight_sum <= 0.0) { @@ -409,29 +433,39 @@ struct ResampleOutput { color: vec3, } +fn revive_canonical(ro: ResampleOutput) -> LiveReservoir { + let radiance = select(vec3(0.0), ro.color / ro.reservoir.contribution_weight, ro.reservoir.contribution_weight > 0.0); + return unpack_reservoir(ro.reservoir, ~0u, radiance); +} + +fn finalize_canonical(reservoir: LiveReservoir) -> ResampleOutput { + var ro = ResampleOutput(); + ro.reservoir = pack_reservoir(reservoir); + ro.color = ro.reservoir.contribution_weight * reservoir.radiance; + return ro; +} + fn finalize_resampling( reservoir: ptr, color_and_weight: ptr>, base: ResampleBase, mis_canonical: f32, rng: ptr, ) -> ResampleOutput { var ro = ResampleOutput(); var canonical = base.canonical; - if (PAIRWISE_MIS && canonical.history > 0.0) { - //TODO: fix the case of `mis_canonical` being too low - canonical.weight_sum *= mis_canonical / canonical.history; - } + canonical.weight_sum *= mis_canonical / canonical.history; merge_reservoir(reservoir, canonical, random_gen(rng)); if (base.accepted_count > 0.0) { - let effective_history = select((*reservoir).history, base.accepted_count, PAIRWISE_MIS); + let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, PAIRWISE_MIS); ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); } else { ro.reservoir = pack_reservoir(canonical); } if (DECOUPLED_SHADING) { - //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let contribution_weight = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + if (canonical.selected_target_score > 0.0) { + let contribution_weight = canonical.weight_sum / canonical.selected_target_score; + *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + } ro.color = (*color_and_weight).xyz / max((*color_and_weight).w, 0.001); } else { ro.color = ro.reservoir.contribution_weight * (*reservoir).radiance; @@ -441,45 +475,29 @@ fn finalize_resampling( fn resample_temporal( surface: Surface, cur_pixel: vec2, position: vec3, - rng: ptr, enable_debug: bool + rng: ptr, debug_len: f32, ) -> ResampleOutput { + if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { + textureStore(out_debug, cur_pixel, vec4(0.0)); + } if (surface.depth == 0.0) { return ResampleOutput(); } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - - // build the canonical sample - var canonical = LiveReservoir(); - for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { - var ls: LightSample; - if (parameters.environment_importance_sampling != 0u) { - ls = sample_light_from_environment(rng); - } else { - ls = sample_light_from_sphere(rng); - } - - let brdf = evaluate_sample(ls, surface, position, debug_len); - if (brdf > 0.0) { - let other = make_reservoir(ls, 0u, vec3(brdf)); - merge_reservoir(&canonical, other, random_gen(rng)); - } else { - bump_reservoir(&canonical, 1.0); - } - } + let canonical = produce_canonical(surface, position, rng, debug_len); //TODO: find best match in a 2x2 grid let prev_pixel = vec2(get_prev_pixel(cur_pixel, position)); let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { - return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); + return finalize_canonical(canonical); } let prev_reservoir = prev_reservoirs[prev_reservoir_index]; let prev_surface = read_prev_surface(prev_pixel); // if the surfaces are too different, there is no trust in this sample if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { - return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); + return finalize_canonical(canonical); } var reservoir = LiveReservoir(); @@ -489,25 +507,36 @@ fn resample_temporal( let prev_dir = get_ray_direction(prev_camera, prev_pixel); let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); - let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, enable_debug); - let total_samples = 2.0; - let mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS) + rr.mis_canonical; + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, debug_len); + let mis_canonical = 1.0 + rr.mis_canonical; + if (debug.view_mode == DebugMode_TemporalMatch) { + textureStore(out_debug, cur_pixel, vec4(1.0)); + } + if (debug.view_mode == DebugMode_TemporalMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); + } + if (debug.view_mode == DebugMode_TemporalMisError) { + let total = mis_canonical + rr.mis_sample; + textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } fn resample_spatial( surface: Surface, cur_pixel: vec2, position: vec3, - group_id: vec3, canonical_stored: StoredReservoir, - rng: ptr, enable_debug: bool + group_id: vec3, canonical: LiveReservoir, + rng: ptr, debug_len: f32, ) -> ResampleOutput { if (surface.depth == 0.0) { + if (debug.view_mode == DebugMode_SpatialMatch || debug.view_mode == DebugMode_SpatialMisCanonical || debug.view_mode == DebugMode_SpatialMisError) { + textureStore(out_debug, cur_pixel, vec4(0.0)); + } let dir = normalize(position - camera.position); var ro = ResampleOutput(); ro.color = evaluate_environment(dir); return ro; } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); // gather the list of neighbors (within the workgroup) to resample. var accepted_count = 0u; @@ -528,20 +557,31 @@ fn resample_spatial( } } - let canonical = unpack_reservoir(canonical_stored, ~0u); var reservoir = LiveReservoir(); - let total_samples = 1.0 + f32(accepted_count); - var mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + var mis_canonical = 1.0; + var mis_sample_sum = 0.0; // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { let other = pixel_cache[accepted_local_indices[lid]]; - let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, enable_debug); + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, debug_len); mis_canonical += rr.mis_canonical; + mis_sample_sum += rr.mis_sample; } + if (debug.view_mode == DebugMode_SpatialMatch) { + let value = f32(accepted_count) / max(1.0, f32(parameters.spatial_taps)); + textureStore(out_debug, cur_pixel, vec4(value)); + } + if (debug.view_mode == DebugMode_SpatialMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); + } + if (debug.view_mode == DebugMode_SpatialMisError) { + let total = mis_canonical + mis_sample_sum; + textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } @@ -553,22 +593,23 @@ fn compute_restir( if (debug.view_mode == DebugMode_Depth) { textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - - let position = camera.position + surface.depth * ray_dir; if (debug.view_mode == DebugMode_Normal) { let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); textureStore(out_debug, pixel, vec4(normal, 0.0)); } + let debug_len = select(0.0, surface.depth * 0.2, enable_debug); + let ray_dir = get_ray_direction(camera, pixel); + let pixel_index = get_reservoir_index(pixel, camera); + let position = camera.position + surface.depth * ray_dir; - let temporal = resample_temporal(surface, pixel, position, rng, enable_debug); + let temporal = resample_temporal(surface, pixel, position, rng, debug_len); pixel_cache[local_index] = PixelCache(surface, temporal.reservoir, position); // sync with the workgroup to ensure all reservoirs are available. workgroupBarrier(); - let spatial = resample_spatial(surface, pixel, position, group_id, temporal.reservoir, rng, enable_debug); + let temporal_live = revive_canonical(temporal); + let spatial = resample_spatial(surface, pixel, position, group_id, temporal_live, rng, debug_len); reservoirs[pixel_index] = spatial.reservoir; return spatial.color; } @@ -598,6 +639,7 @@ fn main( let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; let color = compute_restir(pixel_coord, local_index, group_id, &rng, enable_restir_debug); + if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 1eec948f..14d8ad93 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -51,7 +51,13 @@ pub enum DebugMode { Normal = 2, Motion = 3, HitConsistency = 4, - Variance = 5, + TemporalMatch = 5, + TemporalMisCanonical = 6, + TemporalMisError = 7, + SpatialMatch = 8, + SpatialMisCanonical = 9, + SpatialMisError = 10, + Variance = 100, } impl Default for DebugMode { @@ -1162,8 +1168,20 @@ impl Renderer { let r = self.frame_index as u32 ^ 0x5A; [r % limit + 1, (r / limit) % limit + 1] }; - - let groups = self.main_pipeline.get_dispatch_for(self.surface_size); + let groups = { + let wg_size = self.main_pipeline.get_workgroup_size(); + let cluster_size = [ + wg_size[0] * grid_scale[0], + wg_size[1] * grid_scale[1], + wg_size[2], + ]; + let clusters = self.surface_size.group_by(cluster_size); + [ + clusters[0] * grid_scale[0], + clusters[1] * grid_scale[1], + clusters[2], + ] + }; let mut pc = pass.with(&self.main_pipeline); pc.bind( 0, From 7e37196c80f61462fecca587bb2030e1606729b5 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 2 Sep 2024 13:57:38 -0700 Subject: [PATCH 11/12] Remove extra reservoirs buffer --- blade-render/code/ray-trace.wgsl | 3 +-- blade-render/src/render/mod.rs | 35 ++++++++++++-------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 90cfdfef..b9d40617 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -57,7 +57,6 @@ struct StoredReservoir { confidence: f32, } var reservoirs: array; -var prev_reservoirs: array; struct PixelCache { surface: Surface, @@ -493,7 +492,7 @@ fn resample_temporal( return finalize_canonical(canonical); } - let prev_reservoir = prev_reservoirs[prev_reservoir_index]; + let prev_reservoir = reservoirs[prev_reservoir_index]; let prev_surface = read_prev_surface(prev_pixel); // if the surfaces are too different, there is no trust in this sample if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 14d8ad93..74e3dd9c 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -209,7 +209,7 @@ impl RenderTarget { } struct RestirTargets { - reservoir_buf: [blade_graphics::Buffer; 2], + reservoir_buf: blade_graphics::Buffer, debug: RenderTarget<1>, depth: RenderTarget<2>, basis: RenderTarget<2>, @@ -228,14 +228,11 @@ impl RestirTargets { gpu: &blade_graphics::Context, ) -> Self { let total_reservoirs = size.width as usize * size.height as usize; - let mut reservoir_buf = [blade_graphics::Buffer::default(); 2]; - for (i, rb) in reservoir_buf.iter_mut().enumerate() { - *rb = gpu.create_buffer(blade_graphics::BufferDesc { - name: &format!("reservoirs{i}"), - size: reservoir_size as u64 * total_reservoirs as u64, - memory: blade_graphics::Memory::Device, - }); - } + let reservoir_buf = gpu.create_buffer(blade_graphics::BufferDesc { + name: "reservoirs", + size: reservoir_size as u64 * total_reservoirs as u64, + memory: blade_graphics::Memory::Device, + }); Self { reservoir_buf, @@ -287,9 +284,7 @@ impl RestirTargets { } fn destroy(&self, gpu: &blade_graphics::Context) { - for rb in self.reservoir_buf.iter() { - gpu.destroy_buffer(*rb); - } + gpu.destroy_buffer(self.reservoir_buf); self.debug.destroy(gpu); self.depth.destroy(gpu); self.basis.destroy(gpu); @@ -420,7 +415,6 @@ struct MainData { t_motion: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, - prev_reservoirs: blade_graphics::BufferPiece, out_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -1106,13 +1100,11 @@ impl Renderer { self.debug.reset_lines(&mut transfer); } let total_reservoirs = self.surface_size.width as u64 * self.surface_size.height as u64; - for reservoir_buf in self.targets.reservoir_buf.iter() { - transfer.fill_buffer( - reservoir_buf.at(0), - total_reservoirs * self.reservoir_size as u64, - 0, - ); - } + transfer.fill_buffer( + self.targets.reservoir_buf.at(0), + total_reservoirs * self.reservoir_size as u64, + 0, + ); } if !config.frozen { @@ -1224,8 +1216,7 @@ impl Renderer { t_prev_flat_normal: self.targets.flat_normal.views[prev], t_motion: self.targets.motion.views[0], debug_buf: self.debug.buffer_resource(), - reservoirs: self.targets.reservoir_buf[cur].into(), - prev_reservoirs: self.targets.reservoir_buf[prev].into(), + reservoirs: self.targets.reservoir_buf.into(), out_diffuse: self.targets.light_diffuse.views[cur], out_debug: self.targets.debug.views[0], }, From 53b94118f45f4f8f0a8d7b0fe2691376eea39fa1 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 2 Sep 2024 21:54:28 -0700 Subject: [PATCH 12/12] Merge G-Buffer pass into the Main --- blade-render/code/blur.wgsl | 2 +- blade-render/code/fill-gbuf.wgsl | 204 ---------------------------- blade-render/code/gbuf.inc.wgsl | 2 - blade-render/code/geometry.inc.wgsl | 182 +++++++++++++++++++++++++ blade-render/code/motion.inc.wgsl | 2 + blade-render/code/ray-trace.wgsl | 81 +++++------ blade-render/src/render/mod.rs | 110 ++++----------- 7 files changed, 245 insertions(+), 338 deletions(-) delete mode 100644 blade-render/code/fill-gbuf.wgsl delete mode 100644 blade-render/code/gbuf.inc.wgsl create mode 100644 blade-render/code/geometry.inc.wgsl create mode 100644 blade-render/code/motion.inc.wgsl diff --git a/blade-render/code/blur.wgsl b/blade-render/code/blur.wgsl index 3207ef60..3aec665b 100644 --- a/blade-render/code/blur.wgsl +++ b/blade-render/code/blur.wgsl @@ -1,5 +1,5 @@ #include "camera.inc.wgsl" -#include "gbuf.inc.wgsl" +#include "motion.inc.wgsl" #include "quaternion.inc.wgsl" #include "surface.inc.wgsl" diff --git a/blade-render/code/fill-gbuf.wgsl b/blade-render/code/fill-gbuf.wgsl deleted file mode 100644 index 346edf51..00000000 --- a/blade-render/code/fill-gbuf.wgsl +++ /dev/null @@ -1,204 +0,0 @@ -#include "quaternion.inc.wgsl" -#include "camera.inc.wgsl" -#include "debug.inc.wgsl" -#include "debug-param.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: use proper WGSL -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; - -// Has to match the host! -struct Vertex { - pos: vec3, - bitangent_sign: f32, - tex_coords: vec2, - normal: u32, - tangent: u32, -} -struct VertexBuffer { - data: array, -} -struct IndexBuffer { - data: array, -} -var vertex_buffers: binding_array; -var index_buffers: binding_array; -var textures: binding_array>; -var sampler_linear: sampler; -var sampler_nearest: sampler; - -struct HitEntry { - index_buf: u32, - vertex_buf: u32, - winding: f32, - // packed quaternion - geometry_to_world_rotation: u32, - geometry_to_object: mat4x3, - prev_object_to_world: mat4x3, - base_color_texture: u32, - // packed color factor - base_color_factor: u32, - normal_texture: u32, -} -var hit_entries: array; - -var camera: CameraParams; -var prev_camera: CameraParams; -var debug: DebugParams; -var acc_struct: acceleration_structure; - -var out_depth: texture_storage_2d; -var out_flat_normal: texture_storage_2d; -var out_basis: texture_storage_2d; -var out_albedo: texture_storage_2d; -var out_motion: texture_storage_2d; -var out_debug: texture_storage_2d; - -fn decode_normal(raw: u32) -> vec3 { - return unpack4x8snorm(raw).xyz; -} - -fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { - let nw = normalize(qrot(rotation, decode_normal(normal_raw))); - debug_line(pos, pos + debug_len * nw, color); -} - -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { - if (any(global_id.xy >= camera.target_size)) { - return; - } - - var rq: ray_query; - let ray_dir = get_ray_direction(camera, vec2(global_id.xy)); - rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); - rayQueryProceed(&rq); - let intersection = rayQueryGetCommittedIntersection(&rq); - - var depth = 0.0; - var basis = vec4(0.0); - var flat_normal = vec3(0.0); - var albedo = vec3(1.0); - var motion = vec2(0.0); - let enable_debug = all(global_id.xy == debug.mouse_pos); - - if (intersection.kind != RAY_QUERY_INTERSECTION_NONE) { - let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; - depth = intersection.t; - - var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); - if (entry.index_buf != ~0u) { - let iptr = &index_buffers[entry.index_buf].data; - indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); - } - - let vptr = &vertex_buffers[entry.vertex_buf].data; - let vertices = array( - (*vptr)[indices.x], - (*vptr)[indices.y], - (*vptr)[indices.z], - ); - - let positions_object = entry.geometry_to_object * mat3x4( - vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) - ); - let positions = intersection.object_to_world * mat3x4( - vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) - ); - flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); - - let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); - let position_object = vec4(positions_object * barycentrics, 1.0); - let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; - let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); - let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); - let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; - - let lod = 0.0; //TODO: this is actually complicated - - let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); - let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); - var normal_local: vec3; - if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { - normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map - } else { - let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; - normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); - } - var normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); - basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); - - let hit_position = camera.position + intersection.t * ray_dir; - if (enable_debug) { - debug_buf.entry.custom_index = intersection.instance_custom_index; - debug_buf.entry.depth = intersection.t; - debug_buf.entry.tex_coords = tex_coords; - debug_buf.entry.base_color_texture = entry.base_color_texture; - debug_buf.entry.normal_texture = entry.normal_texture; - debug_buf.entry.position = hit_position; - debug_buf.entry.flat_normal = flat_normal; - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { - let normal_len = 0.15 * intersection.t; - let side = 0.05 * intersection.t; - debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); - debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); - debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { - let debug_len = intersection.t * 0.2; - debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); - debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); - debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); - let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; - debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); - // note: dynamic indexing into positions isn't allowed by WGSL yet - debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - // draw tangent space - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); - } - - let base_color_factor = unpack4x8unorm(entry.base_color_factor); - if ((debug.texture_flags & DebugTextureFlags_ALBEDO) != 0u) { - albedo = base_color_factor.xyz; - } else { - let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); - albedo = (base_color_factor * base_color_sample).xyz; - } - - if (debug.view_mode == DebugMode_HitConsistency) { - let reprojected = get_projected_pixel(camera, hit_position); - let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; - let camera_projection_diff = vec2(global_id.xy) - vec2(reprojected); - let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); - textureStore(out_debug, global_id.xy, consistency); - } - - let prev_position = (entry.prev_object_to_world * position_object).xyz; - let prev_screen = get_projected_pixel_float(prev_camera, prev_position); - //TODO: consider just storing integers here? - //TODO: technically this "0.5" is just a waste compute on both packing and unpacking - motion = prev_screen - vec2(global_id.xy) - 0.5; - if (debug.view_mode == DebugMode_Motion) { - textureStore(out_debug, global_id.xy, vec4(motion * MOTION_SCALE + vec2(0.5), 0.0, 1.0)); - } - } else { - if (enable_debug) { - debug_buf.entry = DebugEntry(); - } - if (debug.view_mode != DebugMode_Final) { - textureStore(out_debug, global_id.xy, vec4(0.0)); - } - } - - // TODO: option to avoid writing data for the sky - textureStore(out_depth, global_id.xy, vec4(depth, 0.0, 0.0, 0.0)); - textureStore(out_basis, global_id.xy, basis); - textureStore(out_flat_normal, global_id.xy, vec4(flat_normal, 0.0)); - textureStore(out_albedo, global_id.xy, vec4(albedo, 0.0)); - textureStore(out_motion, global_id.xy, vec4(motion * MOTION_SCALE, 0.0, 0.0)); -} diff --git a/blade-render/code/gbuf.inc.wgsl b/blade-render/code/gbuf.inc.wgsl deleted file mode 100644 index ecb4642d..00000000 --- a/blade-render/code/gbuf.inc.wgsl +++ /dev/null @@ -1,2 +0,0 @@ -const MOTION_SCALE: f32 = 0.02; -const USE_MOTION_VECTORS: bool = true; \ No newline at end of file diff --git a/blade-render/code/geometry.inc.wgsl b/blade-render/code/geometry.inc.wgsl new file mode 100644 index 00000000..99b771b1 --- /dev/null +++ b/blade-render/code/geometry.inc.wgsl @@ -0,0 +1,182 @@ +//TODO: https://github.com/gfx-rs/wgpu/pull/5429 +const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; + +// Has to match the host! +struct Vertex { + pos: vec3, + bitangent_sign: f32, + tex_coords: vec2, + normal: u32, + tangent: u32, +} +struct VertexBuffer { + data: array, +} +struct IndexBuffer { + data: array, +} +var vertex_buffers: binding_array; +var index_buffers: binding_array; +var textures: binding_array>; + +struct HitEntry { + index_buf: u32, + vertex_buf: u32, + winding: f32, + // packed quaternion + geometry_to_world_rotation: u32, + geometry_to_object: mat4x3, + prev_object_to_world: mat4x3, + base_color_texture: u32, + // packed color factor + base_color_factor: u32, + normal_texture: u32, +} +var hit_entries: array; + +fn decode_normal(raw: u32) -> vec3 { + return unpack4x8snorm(raw).xyz; +} + +fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { + let nw = normalize(qrot(rotation, decode_normal(normal_raw))); + debug_line(pos, pos + debug_len * nw, color); +} + +struct RichSurface { + inner: Surface, + position: vec3, + albedo: vec3, + motion: vec2, +} + +fn fetch_geometry(pixel_coord: vec2, enable_debug: bool, is_primary: bool) -> RichSurface { + var rq: ray_query; + let ray_dir = get_ray_direction(camera, pixel_coord); + rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); + rayQueryProceed(&rq); + let intersection = rayQueryGetCommittedIntersection(&rq); + + var rs = RichSurface(); + rs.albedo = vec3(1.0); + + if (intersection.kind == RAY_QUERY_INTERSECTION_NONE) { + if (enable_debug) { + debug_buf.entry = DebugEntry(); + } + return rs; + } + + let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; + + var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); + if (entry.index_buf != ~0u) { + let iptr = &index_buffers[entry.index_buf].data; + indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); + } + + let vptr = &vertex_buffers[entry.vertex_buf].data; + let vertices = array( + (*vptr)[indices.x], + (*vptr)[indices.y], + (*vptr)[indices.z], + ); + + let positions_object = entry.geometry_to_object * mat3x4( + vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) + ); + let positions = intersection.object_to_world * mat3x4( + vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) + ); + let flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); + + let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); + let position_object = vec4(positions_object * barycentrics, 1.0); + let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; + let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); + let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); + let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; + + let lod = 0.0; //TODO: this is actually complicated + + let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); + let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); + var normal_local: vec3; + if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { + normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map + } else { + let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; + normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); + } + let normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); + let basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); + + let hit_position = camera.position + intersection.t * ray_dir; + if (enable_debug && is_primary) { + debug_buf.entry.custom_index = intersection.instance_custom_index; + debug_buf.entry.depth = intersection.t; + debug_buf.entry.tex_coords = tex_coords; + debug_buf.entry.base_color_texture = entry.base_color_texture; + debug_buf.entry.normal_texture = entry.normal_texture; + debug_buf.entry.position = hit_position; + debug_buf.entry.flat_normal = flat_normal; + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { + let normal_len = 0.15 * intersection.t; + let side = 0.05 * intersection.t; + debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); + debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); + debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { + let debug_len = intersection.t * 0.2; + debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); + debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); + debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); + let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; + debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); + // note: dynamic indexing into positions isn't allowed by WGSL yet + debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + // draw tangent space + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); + } + + rs.albedo = unpack4x8unorm(entry.base_color_factor).xyz; + if ((debug.texture_flags & DebugTextureFlags_ALBEDO) == 0u) { + let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); + rs.albedo *= base_color_sample.xyz; + } + + if (is_primary) { + if (debug.view_mode == DebugMode_Depth) { + textureStore(out_debug, pixel_coord, vec4(intersection.t / camera.depth)); + } + if (debug.view_mode == DebugMode_Normal) { + textureStore(out_debug, pixel_coord, vec4(normal, 0.0)); + } + if (debug.view_mode == DebugMode_HitConsistency) { + let reprojected = get_projected_pixel(camera, hit_position); + let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; + let camera_projection_diff = vec2(pixel_coord - reprojected); + let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); + textureStore(out_debug, pixel_coord, consistency); + } + } + + let prev_position = (entry.prev_object_to_world * position_object).xyz; + let prev_screen = get_projected_pixel_float(prev_camera, prev_position); + //TODO: consider just storing integers here? + //TODO: technically this "0.5" is just a waste compute on both packing and unpacking + rs.motion = prev_screen - vec2(pixel_coord) - 0.5; + rs.position = hit_position; + + // Write down the Surface + rs.inner.basis = basis; + rs.inner.flat_normal = flat_normal; + rs.inner.depth = intersection.t; + return rs; +} diff --git a/blade-render/code/motion.inc.wgsl b/blade-render/code/motion.inc.wgsl new file mode 100644 index 00000000..a9a9f48a --- /dev/null +++ b/blade-render/code/motion.inc.wgsl @@ -0,0 +1,2 @@ +const MOTION_SCALE: f32 = 0.02; +const USE_MOTION_VECTORS: bool = true; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index b9d40617..db350f3f 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -6,10 +6,8 @@ #include "debug-param.inc.wgsl" #include "camera.inc.wgsl" #include "surface.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: https://github.com/gfx-rs/wgpu/pull/5429 -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; +#include "geometry.inc.wgsl" +#include "motion.inc.wgsl" const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; @@ -23,7 +21,6 @@ const DECOUPLED_SHADING: bool = false; //TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; -const GROUP_VISUALIZE: bool = false; struct MainParams { frame_index: u32, @@ -106,6 +103,7 @@ fn make_reservoir(ls: LightSample, light_index: u32, brdf: vec3) -> LiveRes r.history = 1.0; return r; } + fn merge_reservoir(r: ptr, other: LiveReservoir, random: f32) -> bool { (*r).weight_sum += other.weight_sum; (*r).history += other.history; @@ -144,14 +142,9 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { return pack_reservoir_detail(r, r.history); } -var t_depth: texture_2d; var t_prev_depth: texture_2d; -var t_basis: texture_2d; -var t_prev_basis: texture_2d -; -var t_flat_normal: texture_2d; +var t_prev_basis: texture_2d; var t_prev_flat_normal: texture_2d; -var t_motion: texture_2d; var out_diffuse: texture_storage_2d; var out_debug: texture_storage_2d; @@ -206,14 +199,6 @@ fn sample_light_from_environment(rng: ptr) -> LightSample return ls; } -fn read_surface(pixel: vec2) -> Surface { - var surface: Surface; - surface.basis = normalize(textureLoad(t_basis, pixel, 0)); - surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_depth, pixel, 0).x; - return surface; -} - fn read_prev_surface(pixel: vec2) -> Surface { var surface: Surface; surface.basis = normalize(textureLoad(t_prev_basis, pixel, 0)); @@ -268,9 +253,8 @@ fn evaluate_reflected_light(surface: Surface, light_index: u32, light_uv: vec2, pos_world: vec3) -> vec2 { +fn get_prev_pixel(pixel: vec2, pos_world: vec3, motion: vec2) -> vec2 { if (USE_MOTION_VECTORS && parameters.use_motion_vectors != 0u) { - let motion = textureLoad(t_motion, pixel, 0).xy / MOTION_SCALE; return vec2(pixel) + 0.5 + motion; } else { return get_projected_pixel_float(prev_camera, pos_world); @@ -473,7 +457,7 @@ fn finalize_resampling( } fn resample_temporal( - surface: Surface, cur_pixel: vec2, position: vec3, + surface: Surface, motion: vec2, cur_pixel: vec2, position: vec3, rng: ptr, debug_len: f32, ) -> ResampleOutput { if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { @@ -485,7 +469,7 @@ fn resample_temporal( let canonical = produce_canonical(surface, position, rng, debug_len); //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(cur_pixel, position)); + let prev_pixel = vec2(get_prev_pixel(cur_pixel, position, motion)); let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { @@ -585,64 +569,71 @@ fn resample_spatial( } fn compute_restir( + rs: RichSurface, pixel: vec2, local_index: u32, group_id: vec3, rng: ptr, enable_debug: bool, ) -> vec3 { - let surface = read_surface(pixel); - if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); - } - if (debug.view_mode == DebugMode_Normal) { - let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); - textureStore(out_debug, pixel, vec4(normal, 0.0)); - } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - let position = camera.position + surface.depth * ray_dir; + let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); - let temporal = resample_temporal(surface, pixel, position, rng, debug_len); - pixel_cache[local_index] = PixelCache(surface, temporal.reservoir, position); + let temporal = resample_temporal(rs.inner, rs.motion, pixel, rs.position, rng, debug_len); + pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); // sync with the workgroup to ensure all reservoirs are available. workgroupBarrier(); let temporal_live = revive_canonical(temporal); - let spatial = resample_spatial(surface, pixel, position, group_id, temporal_live, rng, debug_len); + let spatial = resample_spatial(rs.inner, pixel, rs.position, group_id, temporal_live, rng, debug_len); + + let pixel_index = get_reservoir_index(pixel, camera); reservoirs[pixel_index] = spatial.reservoir; return spatial.color; } +var out_depth: texture_storage_2d; +var out_basis: texture_storage_2d; +var out_flat_normal: texture_storage_2d; +var out_albedo: texture_storage_2d; +var out_motion: texture_storage_2d; + @compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) fn main( @builtin(workgroup_id) group_id: vec3, @builtin(local_invocation_index) local_index: u32, ) { - pixel_cache[local_index].reservoir.confidence = 0.0; + pixel_cache[local_index] = PixelCache(); let pixel_coord = thread_index_to_coord(local_index, group_id); if (any(vec2(pixel_coord) >= camera.target_size)) { return; } - if (GROUP_VISUALIZE) - { + + if (debug.view_mode == DebugMode_Grouping) { var rng = random_init(group_id.y * 1000u + group_id.x, 0u); let h = random_gen(&rng) * 360.0; let color = hsv_to_rgb(h, 0.5, 1.0); - textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); + textureStore(out_debug, pixel_coord, vec4(color, 1.0)); return; } + let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); + let rs = fetch_geometry(pixel_coord, true, enable_debug); + + // TODO: option to avoid writing data for the sky + textureStore(out_depth, pixel_coord, vec4(rs.inner.depth, 0.0, 0.0, 0.0)); + textureStore(out_basis, pixel_coord, rs.inner.basis); + textureStore(out_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); + textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); + textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); + let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let color = compute_restir(pixel_coord, local_index, group_id, &rng, enable_restir_debug); + let color = compute_restir(rs, pixel_coord, local_index, group_id, &rng, enable_restir_debug); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; debug_buf.variance.count += 1u; } - textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); } diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 74e3dd9c..e0e724c5 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -51,12 +51,13 @@ pub enum DebugMode { Normal = 2, Motion = 3, HitConsistency = 4, - TemporalMatch = 5, - TemporalMisCanonical = 6, - TemporalMisError = 7, - SpatialMatch = 8, - SpatialMisCanonical = 9, - SpatialMisError = 10, + Grouping = 5, + TemporalMatch = 10, + TemporalMisCanonical = 11, + TemporalMisError = 12, + SpatialMatch = 13, + SpatialMisCanonical = 14, + SpatialMisError = 15, Variance = 100, } @@ -313,7 +314,6 @@ pub struct Renderer { shaders: Shaders, targets: RestirTargets, post_proc_input_index: usize, - fill_pipeline: blade_graphics::ComputePipeline, main_pipeline: blade_graphics::ComputePipeline, post_proc_pipeline: blade_graphics::RenderPipeline, blur: Blur, @@ -375,46 +375,31 @@ struct MainParams { } #[derive(blade_macros::ShaderData)] -struct FillData<'a> { +struct MainData<'a> { camera: CameraParams, prev_camera: CameraParams, debug: DebugParams, + parameters: MainParams, acc_struct: blade_graphics::AccelerationStructure, + prev_acc_struct: blade_graphics::AccelerationStructure, hit_entries: blade_graphics::BufferPiece, index_buffers: &'a blade_graphics::BufferArray, vertex_buffers: &'a blade_graphics::BufferArray, textures: &'a blade_graphics::TextureArray, sampler_linear: blade_graphics::Sampler, - debug_buf: blade_graphics::BufferPiece, - out_depth: blade_graphics::TextureView, - out_basis: blade_graphics::TextureView, - out_flat_normal: blade_graphics::TextureView, - out_albedo: blade_graphics::TextureView, - out_motion: blade_graphics::TextureView, - out_debug: blade_graphics::TextureView, -} - -#[derive(blade_macros::ShaderData)] -struct MainData { - camera: CameraParams, - prev_camera: CameraParams, - debug: DebugParams, - parameters: MainParams, - acc_struct: blade_graphics::AccelerationStructure, - prev_acc_struct: blade_graphics::AccelerationStructure, - sampler_linear: blade_graphics::Sampler, sampler_nearest: blade_graphics::Sampler, env_map: blade_graphics::TextureView, env_weights: blade_graphics::TextureView, - t_depth: blade_graphics::TextureView, t_prev_depth: blade_graphics::TextureView, - t_basis: blade_graphics::TextureView, t_prev_basis: blade_graphics::TextureView, - t_flat_normal: blade_graphics::TextureView, t_prev_flat_normal: blade_graphics::TextureView, - t_motion: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, + out_depth: blade_graphics::TextureView, + out_basis: blade_graphics::TextureView, + out_flat_normal: blade_graphics::TextureView, + out_albedo: blade_graphics::TextureView, + out_motion: blade_graphics::TextureView, out_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -492,7 +477,6 @@ struct HitEntry { #[derive(Clone, PartialEq)] pub struct Shaders { env_prepare: blade_asset::Handle, - fill_gbuf: blade_asset::Handle, ray_trace: blade_asset::Handle, blur: blade_asset::Handle, post_proc: blade_asset::Handle, @@ -505,7 +489,6 @@ impl Shaders { let mut ctx = asset_hub.open_context(path, "shader finish"); let shaders = Self { env_prepare: ctx.load_shader("env-prepare.wgsl"), - fill_gbuf: ctx.load_shader("fill-gbuf.wgsl"), ray_trace: ctx.load_shader("ray-trace.wgsl"), blur: ctx.load_shader("blur.wgsl"), post_proc: ctx.load_shader("post-proc.wgsl"), @@ -517,7 +500,6 @@ impl Shaders { } struct ShaderPipelines { - fill: blade_graphics::ComputePipeline, main: blade_graphics::ComputePipeline, temporal_accum: blade_graphics::ComputePipeline, atrous: blade_graphics::ComputePipeline, @@ -527,19 +509,6 @@ struct ShaderPipelines { } impl ShaderPipelines { - fn create_gbuf_fill( - shader: &blade_graphics::Shader, - gpu: &blade_graphics::Context, - ) -> blade_graphics::ComputePipeline { - shader.check_struct_size::(); - shader.check_struct_size::(); - let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "fill-gbuf", - data_layouts: &[&layout], - compute: shader.at("main"), - }) - } fn create_ray_trace( shader: &blade_graphics::Shader, gpu: &blade_graphics::Context, @@ -617,7 +586,6 @@ impl ShaderPipelines { let sh_main = shader_man[shaders.ray_trace].raw.as_ref().unwrap(); let sh_blur = shader_man[shaders.blur].raw.as_ref().unwrap(); Ok(Self { - fill: Self::create_gbuf_fill(shader_man[shaders.fill_gbuf].raw.as_ref().unwrap(), gpu), main: Self::create_ray_trace(sh_main, gpu), temporal_accum: Self::create_temporal_accum(sh_blur, gpu), atrous: Self::create_atrous(sh_blur, gpu), @@ -708,7 +676,6 @@ impl Renderer { shaders, targets, post_proc_input_index: 0, - fill_pipeline: sp.fill, main_pipeline: sp.main, post_proc_pipeline: sp.post_proc, blur: Blur { @@ -755,7 +722,6 @@ impl Renderer { // pipelines gpu.destroy_compute_pipeline(&mut self.blur.temporal_accum_pipeline); gpu.destroy_compute_pipeline(&mut self.blur.atrous_pipeline); - gpu.destroy_compute_pipeline(&mut self.fill_pipeline); gpu.destroy_compute_pipeline(&mut self.main_pipeline); gpu.destroy_render_pipeline(&mut self.post_proc_pipeline); } @@ -770,7 +736,6 @@ impl Renderer { let mut tasks = Vec::new(); let old = self.shaders.clone(); - tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.fill_gbuf)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.ray_trace)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.blur)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.post_proc)); @@ -787,11 +752,6 @@ impl Renderer { let _ = task.join(); } - if self.shaders.fill_gbuf != old.fill_gbuf { - if let Ok(ref shader) = asset_hub.shaders[self.shaders.fill_gbuf].raw { - self.fill_pipeline = ShaderPipelines::create_gbuf_fill(shader, gpu); - } - } if self.shaders.ray_trace != old.ray_trace { if let Ok(ref shader) = asset_hub.shaders[self.shaders.ray_trace].raw { assert_eq!( @@ -1127,33 +1087,6 @@ impl Renderer { let debug = self.make_debug_params(&debug_config); let (cur, prev) = self.work_indices(); - if let mut pass = command_encoder.compute() { - let mut pc = pass.with(&self.fill_pipeline); - let groups = self.fill_pipeline.get_dispatch_for(self.surface_size); - pc.bind( - 0, - &FillData { - camera: self.targets.camera_params[cur], - prev_camera: self.targets.camera_params[prev], - debug, - acc_struct: self.acceleration_structure, - hit_entries: self.hit_buffer.into(), - index_buffers: &self.index_buffers, - vertex_buffers: &self.vertex_buffers, - textures: &self.textures, - sampler_linear: self.samplers.linear, - debug_buf: self.debug.buffer_resource(), - out_depth: self.targets.depth.views[cur], - out_basis: self.targets.basis.views[cur], - out_flat_normal: self.targets.flat_normal.views[cur], - out_albedo: self.targets.albedo.views[0], - out_motion: self.targets.motion.views[0], - out_debug: self.targets.debug.views[0], - }, - ); - pc.dispatch(groups); - } - if let mut pass = command_encoder.compute() { let grid_scale = { let limit = ray_config.group_mixer; @@ -1204,19 +1137,24 @@ impl Renderer { } else { self.prev_acceleration_structure }, + hit_entries: self.hit_buffer.into(), + index_buffers: &self.index_buffers, + vertex_buffers: &self.vertex_buffers, + textures: &self.textures, sampler_linear: self.samplers.linear, sampler_nearest: self.samplers.nearest, env_map: self.env_map.main_view, env_weights: self.env_map.weight_view, - t_depth: self.targets.depth.views[cur], t_prev_depth: self.targets.depth.views[prev], - t_basis: self.targets.basis.views[cur], t_prev_basis: self.targets.basis.views[prev], - t_flat_normal: self.targets.flat_normal.views[cur], t_prev_flat_normal: self.targets.flat_normal.views[prev], - t_motion: self.targets.motion.views[0], debug_buf: self.debug.buffer_resource(), reservoirs: self.targets.reservoir_buf.into(), + out_depth: self.targets.depth.views[cur], + out_basis: self.targets.basis.views[cur], + out_flat_normal: self.targets.flat_normal.views[cur], + out_albedo: self.targets.albedo.views[0], + out_motion: self.targets.motion.views[0], out_diffuse: self.targets.light_diffuse.views[cur], out_debug: self.targets.debug.views[0], },