kvark · kvark · Sep 20, 2024 · Sep 19, 2024 · Sep 19, 2024 · Sep 7, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,7 +21,10 @@ glam = { version = "0.27", features = ["mint"] }
 gltf = { version = "1.1", default-features = false }
 log = "0.4"
 mint = "0.5"
-naga = { version = "22", features = ["wgsl-in"] }
+#TODO: switch to crates once https://github.com/gfx-rs/wgpu/pull/6256 is published
+naga = { git = "https://github.com/gfx-rs/wgpu", rev = "dfc384a7fd4ab7250a75d59c6f831d9ffb220f7e", features = [
+    "wgsl-in",
+] }
 profiling = "1"
 slab = "0.4"
 strum = { version = "0.25", features = ["derive"] }

diff --git a/blade-graphics/src/vulkan/pipeline.rs b/blade-graphics/src/vulkan/pipeline.rs
@@ -96,6 +96,8 @@ impl super::Context {
             naga_options_debug.debug_info = Some(naga::back::spv::DebugInfo {
                 source_code: &sf.shader.source,
                 file_name: &file_path,
+                //TODO: switch to WGSL once NSight Graphics recognizes it
+                language: naga::back::spv::SourceLanguage::GLSL,
             });
             &naga_options_debug
         } else {

diff --git a/blade-helpers/src/camera.rs b/blade-helpers/src/camera.rs
@@ -1,5 +1,7 @@
 use super::ExposeHud;
 
+const MAX_FLY_SPEED: f32 = 1000000.0;
+
 pub struct ControlledCamera {
     pub inner: blade_render::Camera,
     pub fly_speed: f32,
@@ -86,6 +88,14 @@ impl ControlledCamera {
 
         true
     }
+
+    pub fn on_wheel(&mut self, delta: winit::event::MouseScrollDelta) {
+        let shift = match delta {
+            winit::event::MouseScrollDelta::LineDelta(_, lines) => lines,
+            winit::event::MouseScrollDelta::PixelDelta(position) => position.y as f32,
+        };
+        self.fly_speed = (self.fly_speed * shift.exp()).clamp(1.0, MAX_FLY_SPEED);
+    }
 }
 
 impl ExposeHud for ControlledCamera {
@@ -105,7 +115,7 @@ impl ExposeHud for ControlledCamera {
         });
         ui.add(egui::Slider::new(&mut self.inner.fov_y, 0.5f32..=2.0f32).text("FOV"));
         ui.add(
-            egui::Slider::new(&mut self.fly_speed, 1f32..=100000f32)
+            egui::Slider::new(&mut self.fly_speed, 1f32..=MAX_FLY_SPEED)
                 .text("Fly speed")
                 .logarithmic(true),
         );

diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs
@@ -31,6 +31,10 @@ impl ExposeHud for blade_render::RayConfig {
                 .text("T min")
                 .logarithmic(true),
         );
+        ui.checkbox(&mut self.pairwise_mis, "Pairwise MIS");
+        ui.add(
+            egui::widgets::Slider::new(&mut self.defensive_mis, 0.0..=1.0).text("Defensive MIS"),
+        );
     }
 }
 

diff --git a/blade-render/code/blur.wgsl → blade-render/code/a-trous.wgsl b/blade-render/code/blur.wgsl → blade-render/code/a-trous.wgsl
diff --git a/blade-render/code/camera.inc.wgsl b/blade-render/code/camera.inc.wgsl
@@ -6,11 +6,13 @@ struct CameraParams {
     target_size: vec2<u32>,
 }
 
+const VFLIP: vec2<f32> = vec2<f32>(1.0, -1.0);
+
 fn get_ray_direction(cp: CameraParams, pixel: vec2<i32>) -> vec3<f32> {
     let half_size = 0.5 * vec2<f32>(cp.target_size);
     let ndc = (vec2<f32>(pixel) + vec2<f32>(0.5) - half_size) / half_size;
     // Right-handed coordinate system with X=right, Y=up, and Z=towards the camera
-    let local_dir = vec3<f32>(ndc * tan(0.5 * cp.fov), -1.0);
+    let local_dir = vec3<f32>(VFLIP * ndc * tan(0.5 * cp.fov), -1.0);
     return normalize(qrot(cp.orientation, local_dir));
 }
 
@@ -21,7 +23,7 @@ fn get_projected_pixel_float(cp: CameraParams, point: vec3<f32>) -> vec2<f32> {
     }
     let ndc = local_dir.xy / (-local_dir.z * tan(0.5 * cp.fov));
     let half_size = 0.5 * vec2<f32>(cp.target_size);
-    return (ndc + vec2<f32>(1.0)) * half_size;
+    return (VFLIP * ndc + vec2<f32>(1.0)) * half_size;
 }
 
 fn get_projected_pixel(cp: CameraParams, point: vec3<f32>) -> vec2<i32> {

diff --git a/blade-render/code/fill-gbuf.wgsl b/blade-render/code/fill-gbuf.wgsl
@@ -68,6 +68,9 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
     if (any(global_id.xy >= camera.target_size)) {
         return;
     }
+    if (WRITE_DEBUG_IMAGE && debug.view_mode != DebugMode_Final) {
+        textureStore(out_debug, global_id.xy, vec4<f32>(0.0));
+    }
 
     var rq: ray_query;
     let ray_dir = get_ray_direction(camera, vec2<i32>(global_id.xy));
@@ -170,7 +173,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
             albedo = (base_color_factor * base_color_sample).xyz;
         }
 
-        if (debug.view_mode == DebugMode_HitConsistency) {
+        if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_HitConsistency) {
             let reprojected = get_projected_pixel(camera, hit_position);
             let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position;
             let camera_projection_diff = vec2<f32>(global_id.xy) - vec2<f32>(reprojected);
@@ -183,16 +186,13 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
         //TODO: consider just storing integers here?
         //TODO: technically this "0.5" is just a waste compute on both packing and unpacking
         motion = prev_screen - vec2<f32>(global_id.xy) - 0.5;
-        if (debug.view_mode == DebugMode_Motion) {
+        if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Motion) {
             textureStore(out_debug, global_id.xy, vec4<f32>(motion * MOTION_SCALE + vec2<f32>(0.5), 0.0, 1.0));
         }
     } else {
         if (enable_debug) {
             debug_buf.entry = DebugEntry();
         }
-        if (debug.view_mode != DebugMode_Final) {
-            textureStore(out_debug, global_id.xy, vec4<f32>(0.0));
-        }
     }
 
     // TODO: option to avoid writing data for the sky

diff --git a/blade-render/code/gbuf.inc.wgsl b/blade-render/code/gbuf.inc.wgsl
@@ -1,2 +1,3 @@
 const MOTION_SCALE: f32 = 0.02;
-const USE_MOTION_VECTORS: bool = true;
+const USE_MOTION_VECTORS: bool = true;
+const WRITE_DEBUG_IMAGE: bool = false;
diff --git a/blade-render/code/post-proc.wgsl b/blade-render/code/post-proc.wgsl
@@ -21,16 +21,16 @@ struct VertexOutput {
 }
 
 @vertex
-fn blit_vs(@builtin(vertex_index) vi: u32) -> VertexOutput {
+fn postfx_vs(@builtin(vertex_index) vi: u32) -> VertexOutput {
     var vo: VertexOutput;
     vo.clip_pos = vec4<f32>(f32(vi & 1u) * 4.0 - 1.0, f32(vi & 2u) * 2.0 - 1.0, 0.0, 1.0);
     vo.input_size = textureDimensions(light_diffuse, 0);
     return vo;
 }
 
 @fragment
-fn blit_fs(vo: VertexOutput) -> @location(0) vec4<f32> {
-    let tc = vec2<i32>(i32(vo.clip_pos.x), i32(vo.input_size.y) - i32(vo.clip_pos.y) - 1);
+fn postfx_fs(vo: VertexOutput) -> @location(0) vec4<f32> {
+    let tc = vec2<i32>(i32(vo.clip_pos.x), i32(vo.clip_pos.y));
     let illumunation = textureLoad(light_diffuse, tc, 0);
     if (debug_params.view_mode == DebugMode_Final) {
         let albedo = textureLoad(t_albedo, tc, 0).xyz;

diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl
@@ -12,16 +12,16 @@ const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u;
 
 const PI: f32 = 3.1415926;
 const MAX_RESERVOIRS: u32 = 2u;
-// See "9.1 pairwise mis for robust reservoir reuse"
-// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport"
-const PAIRWISE_MIS: bool = true;
-// Base MIS for canonical samples. The constant isolates a critical difference between
-// Bitterli's pseudocode (where it's 1) and NVidia's RTXDI implementation (where it's 0).
-// With Bitterli's 1 we have MIS not respecting the prior history enough.
-const BASE_CANONICAL_MIS: f32 = 0.05;
 // See "DECOUPLING SHADING AND REUSE" in
 // "Rearchitecting Spatiotemporal Resampling for Production"
-const DECOUPLED_SHADING: bool = false;
+const DECOUPLED_SHADING: bool = true;
+
+// We are considering 2x2 grid, so must be <= 4
+const FACTOR_TEMPORAL_CANDIDATES: u32 = 1u;
+// How many more candidates to consder than the taps we need
+const FACTOR_SPATIAL_CANDIDATES: u32 = 3u;
+// Has to be at least discarding the 2x2 block
+const MIN_SPATIAL_REUSE_DISTANCE: i32 = 7;
 
 struct MainParams {
     frame_index: u32,
@@ -33,6 +33,8 @@ struct MainParams {
     spatial_tap_history: u32,
     spatial_radius: i32,
     t_start: f32,
+    use_pairwise_mis: u32,
+    defensive_mis: f32,
     use_motion_vectors: u32,
 };
 
@@ -320,27 +322,15 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3<f32>, debu
     return brdf;
 }
 
-struct HeuristicFactors {
-    weight: f32,
-    //history: f32,
-}
-
-fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors {
-    var hf: HeuristicFactors;
-    let balance_denom = h0 * w0 + h1 * w1;
-    hf.weight = select(h0 * w0 / balance_denom, 0.0, balance_denom <= 0.0);
-    //hf.history = select(pow(clamp(w1 / w0, 0.0, 1.0), 8.0), 1.0, w0 <= 0.0);
-    return hf;
+fn ratio(a: f32, b: f32) -> f32 {
+    return select(0.0, a / (a+b), a+b > 0.0);
 }
 
 struct RestirOutput {
     radiance: vec3<f32>,
 }
 
 fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomState>, enable_debug: bool) -> RestirOutput {
-    if (debug.view_mode == DebugMode_Depth) {
-        textureStore(out_debug, pixel, vec4<f32>(surface.depth / camera.depth));
-    }
     let ray_dir = get_ray_direction(camera, pixel);
     let pixel_index = get_reservoir_index(pixel, camera);
     if (surface.depth == 0.0) {
@@ -349,10 +339,13 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
         return RestirOutput(env);
     }
 
+    if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Depth) {
+        textureStore(out_debug, pixel, vec4<f32>(1.0 / surface.depth));
+    }
     let debug_len = select(0.0, surface.depth * 0.2, enable_debug);
     let position = camera.position + surface.depth * ray_dir;
     let normal = qrot(surface.basis, vec3<f32>(0.0, 0.0, 1.0));
-    if (debug.view_mode == DebugMode_Normal) {
+    if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Normal) {
         textureStore(out_debug, pixel, vec4<f32>(normal, 0.0));
     }
 
@@ -374,22 +367,36 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
         }
     }
 
-    //TODO: find best match in a 2x2 grid
-    let prev_pixel = vec2<i32>(get_prev_pixel(pixel, position));
+    let center_coord = get_prev_pixel(pixel, position);
+    let center_pixel = vec2<i32>(center_coord);
+    // Trick to start with closer pixels: we derive the "further"
+    // pixel in 2x2 grid by considering the sum.
+    let further_pixel = vec2<i32>(center_coord - 0.5) + vec2<i32>(center_coord + 0.5) - center_pixel;
 
     // First, gather the list of reservoirs to merge with
     var accepted_reservoir_indices = array<i32, MAX_RESERVOIRS>();
     var accepted_count = 0u;
     var temporal_index = ~0u;
-    for (var tap = 0u; tap <= parameters.spatial_taps; tap += 1u) {
-        var other_pixel = prev_pixel;
-        if (tap != 0u) {
-            let r0 = max(prev_pixel - vec2<i32>(parameters.spatial_radius), vec2<i32>(0));
-            let r1 = min(prev_pixel + vec2<i32>(parameters.spatial_radius + 1), vec2<i32>(prev_camera.target_size));
+    let num_temporal_candidates = parameters.temporal_tap * FACTOR_TEMPORAL_CANDIDATES;
+    let num_candidates = num_temporal_candidates + parameters.spatial_taps * FACTOR_SPATIAL_CANDIDATES;
+    let max_samples = min(MAX_RESERVOIRS, 1u + parameters.spatial_taps);
+
+    for (var tap = 0u; tap <= num_candidates && accepted_count < max_samples; tap += 1u) {
+        var other_pixel = center_pixel;
+        if (tap < num_temporal_candidates) {
+            if (temporal_index < tap) {
+                continue;
+            }
+            let mask = vec2<u32>(tap) & vec2<u32>(1u, 2u);
+            other_pixel = select(center_pixel, further_pixel, mask != vec2<u32>(0u));
+        } else {
+            let r0 = max(center_pixel - vec2<i32>(parameters.spatial_radius), vec2<i32>(0));
+            let r1 = min(center_pixel + vec2<i32>(parameters.spatial_radius + 1), vec2<i32>(prev_camera.target_size));
             other_pixel = vec2<i32>(mix(vec2<f32>(r0), vec2<f32>(r1), vec2<f32>(random_gen(rng), random_gen(rng))));
-        } else if (parameters.temporal_tap == 0u)
-        {
-            continue;
+            let diff = other_pixel - center_pixel;
+            if (dot(diff, diff) < MIN_SPATIAL_REUSE_DISTANCE) {
+                continue;
+            }
         }
 
         let other_index = get_reservoir_index(other_pixel, prev_camera);
@@ -407,27 +414,35 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
             continue;
         }
 
-        if (tap == 0u) {
+        if (tap < num_temporal_candidates) {
             temporal_index = accepted_count;
         }
         accepted_reservoir_indices[accepted_count] = other_index;
-        if (accepted_count < MAX_RESERVOIRS) {
-            accepted_count += 1u;
+        accepted_count += 1u;
+    }
+
+    if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SampleReuse) {
+        var color = vec4<f32>(0.0);
+        for (var i = 0u; i < min(3u, accepted_count); i += 1u) {
+            color[i] = 1.0;
         }
+        textureStore(out_debug, pixel, color);
     }
 
     // Next, evaluate the MIS of each of the samples versus the canonical one.
     var reservoir = LiveReservoir();
-    var shaded_color = vec3<f32>(0.0);
-    var mis_canonical = BASE_CANONICAL_MIS;
     var color_and_weight = vec4<f32>(0.0);
+    let mis_scale = 1.0 / (f32(accepted_count) + parameters.defensive_mis);
+    var mis_canonical = select(mis_scale * parameters.defensive_mis, 1.0, accepted_count == 0u || parameters.use_pairwise_mis == 0u);
+    let inv_count = 1.0 / f32(accepted_count);
+
     for (var rid = 0u; rid < accepted_count; rid += 1u) {
         let neighbor_index = accepted_reservoir_indices[rid];
         let neighbor = prev_reservoirs[neighbor_index];
 
         let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, rid == temporal_index);
         var other: LiveReservoir;
-        if (PAIRWISE_MIS) {
+        if (parameters.use_pairwise_mis != 0u) {
             let neighbor_pixel = get_pixel_from_reservoir_index(neighbor_index, prev_camera);
             let neighbor_history = min(neighbor.confidence, f32(max_history));
             {   // scoping this to hint the register allocation
@@ -437,30 +452,20 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
 
                 let t_canonical_at_neighbor = estimate_target_score_with_occlusion(
                     neighbor_surface, neighbor_position, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len);
-                let mis_sub_canonical = balance_heuristic(
-                    t_canonical_at_neighbor.score, canonical.selected_target_score,
-                    neighbor_history * f32(accepted_count), canonical.history);
-                mis_canonical += 1.0 - mis_sub_canonical.weight;
+                let r_canonical = ratio(canonical.history * canonical.selected_target_score * inv_count, neighbor_history * t_canonical_at_neighbor.score);
+                mis_canonical += mis_scale * r_canonical;
             }
 
-            // Notes about t_neighbor_at_neighbor:
-            // 1. we assume lights aren't moving. Technically we should check if the
-            //   target light has moved, and re-evaluate the occlusion.
-            // 2. we can use the cached target score, and there is no use of the target color
-            //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir);
             let t_neighbor_at_canonical = estimate_target_score_with_occlusion(
                 surface, position, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len);
-            let mis_neighbor = balance_heuristic(
-                neighbor.target_score, t_neighbor_at_canonical.score,
-                neighbor_history * f32(accepted_count), canonical.history);
+            let r_neighbor = ratio(neighbor_history * neighbor.target_score, canonical.history * t_neighbor_at_canonical.score * inv_count);
+            let mis_neighbor = mis_scale * r_neighbor;
 
             other.history = neighbor_history;
             other.selected_light_index = neighbor.light_index;
             other.selected_uv = neighbor.light_uv;
             other.selected_target_score = t_neighbor_at_canonical.score;
-            other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight;
-            //Note: should be needed according to the paper
-            // other.history *= min(mis_neighbor.history, mis_sub_canonical.history);
+            other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor;
             other.radiance = t_neighbor_at_canonical.color;
         } else {
             other = unpack_reservoir(neighbor, max_history);
@@ -478,17 +483,16 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
     }
 
     // Finally, merge in the canonical sample
-    if (PAIRWISE_MIS) {
+    if (parameters.use_pairwise_mis != 0) {
         canonical.weight_sum *= mis_canonical / canonical.history;
     }
     if (DECOUPLED_SHADING) {
-        //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS?
-        let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1);
+        let cw = canonical.weight_sum / max(canonical.selected_target_score, 0.1);
         color_and_weight += canonical.weight_sum * vec4<f32>(cw * canonical.radiance, 1.0);
     }
     merge_reservoir(&reservoir, canonical, random_gen(rng));
 
-    let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS);
+    let effective_history = select(reservoir.history, 1.0, parameters.use_pairwise_mis != 0);
     let stored = pack_reservoir_detail(reservoir, effective_history);
     reservoirs[pixel_index] = stored;
     var ro = RestirOutput();