Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incremental ReSTIR improvements #177

Merged
merged 8 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ glam = { version = "0.27", features = ["mint"] }
gltf = { version = "1.1", default-features = false }
log = "0.4"
mint = "0.5"
naga = { version = "22", features = ["wgsl-in"] }
#TODO: switch to crates once https://github.com/gfx-rs/wgpu/pull/6256 is published
naga = { git = "https://github.com/gfx-rs/wgpu", rev = "dfc384a7fd4ab7250a75d59c6f831d9ffb220f7e", features = [
"wgsl-in",
] }
profiling = "1"
slab = "0.4"
strum = { version = "0.25", features = ["derive"] }
Expand Down
2 changes: 2 additions & 0 deletions blade-graphics/src/vulkan/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ impl super::Context {
naga_options_debug.debug_info = Some(naga::back::spv::DebugInfo {
source_code: &sf.shader.source,
file_name: &file_path,
//TODO: switch to WGSL once NSight Graphics recognizes it
language: naga::back::spv::SourceLanguage::GLSL,
});
&naga_options_debug
} else {
Expand Down
12 changes: 11 additions & 1 deletion blade-helpers/src/camera.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use super::ExposeHud;

const MAX_FLY_SPEED: f32 = 1000000.0;

pub struct ControlledCamera {
pub inner: blade_render::Camera,
pub fly_speed: f32,
Expand Down Expand Up @@ -86,6 +88,14 @@ impl ControlledCamera {

true
}

pub fn on_wheel(&mut self, delta: winit::event::MouseScrollDelta) {
let shift = match delta {
winit::event::MouseScrollDelta::LineDelta(_, lines) => lines,
winit::event::MouseScrollDelta::PixelDelta(position) => position.y as f32,
};
self.fly_speed = (self.fly_speed * shift.exp()).clamp(1.0, MAX_FLY_SPEED);
}
}

impl ExposeHud for ControlledCamera {
Expand All @@ -105,7 +115,7 @@ impl ExposeHud for ControlledCamera {
});
ui.add(egui::Slider::new(&mut self.inner.fov_y, 0.5f32..=2.0f32).text("FOV"));
ui.add(
egui::Slider::new(&mut self.fly_speed, 1f32..=100000f32)
egui::Slider::new(&mut self.fly_speed, 1f32..=MAX_FLY_SPEED)
.text("Fly speed")
.logarithmic(true),
);
Expand Down
4 changes: 4 additions & 0 deletions blade-helpers/src/hud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ impl ExposeHud for blade_render::RayConfig {
.text("T min")
.logarithmic(true),
);
ui.checkbox(&mut self.pairwise_mis, "Pairwise MIS");
ui.add(
egui::widgets::Slider::new(&mut self.defensive_mis, 0.0..=1.0).text("Defensive MIS"),
);
}
}

Expand Down
File renamed without changes.
6 changes: 4 additions & 2 deletions blade-render/code/camera.inc.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ struct CameraParams {
target_size: vec2<u32>,
}

const VFLIP: vec2<f32> = vec2<f32>(1.0, -1.0);

fn get_ray_direction(cp: CameraParams, pixel: vec2<i32>) -> vec3<f32> {
let half_size = 0.5 * vec2<f32>(cp.target_size);
let ndc = (vec2<f32>(pixel) + vec2<f32>(0.5) - half_size) / half_size;
// Right-handed coordinate system with X=right, Y=up, and Z=towards the camera
let local_dir = vec3<f32>(ndc * tan(0.5 * cp.fov), -1.0);
let local_dir = vec3<f32>(VFLIP * ndc * tan(0.5 * cp.fov), -1.0);
return normalize(qrot(cp.orientation, local_dir));
}

Expand All @@ -21,7 +23,7 @@ fn get_projected_pixel_float(cp: CameraParams, point: vec3<f32>) -> vec2<f32> {
}
let ndc = local_dir.xy / (-local_dir.z * tan(0.5 * cp.fov));
let half_size = 0.5 * vec2<f32>(cp.target_size);
return (ndc + vec2<f32>(1.0)) * half_size;
return (VFLIP * ndc + vec2<f32>(1.0)) * half_size;
}

fn get_projected_pixel(cp: CameraParams, point: vec3<f32>) -> vec2<i32> {
Expand Down
10 changes: 5 additions & 5 deletions blade-render/code/fill-gbuf.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
if (any(global_id.xy >= camera.target_size)) {
return;
}
if (WRITE_DEBUG_IMAGE && debug.view_mode != DebugMode_Final) {
textureStore(out_debug, global_id.xy, vec4<f32>(0.0));
}

var rq: ray_query;
let ray_dir = get_ray_direction(camera, vec2<i32>(global_id.xy));
Expand Down Expand Up @@ -170,7 +173,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
albedo = (base_color_factor * base_color_sample).xyz;
}

if (debug.view_mode == DebugMode_HitConsistency) {
if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_HitConsistency) {
let reprojected = get_projected_pixel(camera, hit_position);
let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position;
let camera_projection_diff = vec2<f32>(global_id.xy) - vec2<f32>(reprojected);
Expand All @@ -183,16 +186,13 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
//TODO: consider just storing integers here?
//TODO: technically this "0.5" is just a waste compute on both packing and unpacking
motion = prev_screen - vec2<f32>(global_id.xy) - 0.5;
if (debug.view_mode == DebugMode_Motion) {
if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Motion) {
textureStore(out_debug, global_id.xy, vec4<f32>(motion * MOTION_SCALE + vec2<f32>(0.5), 0.0, 1.0));
}
} else {
if (enable_debug) {
debug_buf.entry = DebugEntry();
}
if (debug.view_mode != DebugMode_Final) {
textureStore(out_debug, global_id.xy, vec4<f32>(0.0));
}
}

// TODO: option to avoid writing data for the sky
Expand Down
3 changes: 2 additions & 1 deletion blade-render/code/gbuf.inc.wgsl
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
const MOTION_SCALE: f32 = 0.02;
const USE_MOTION_VECTORS: bool = true;
const USE_MOTION_VECTORS: bool = true;
const WRITE_DEBUG_IMAGE: bool = false;
6 changes: 3 additions & 3 deletions blade-render/code/post-proc.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ struct VertexOutput {
}

@vertex
fn blit_vs(@builtin(vertex_index) vi: u32) -> VertexOutput {
fn postfx_vs(@builtin(vertex_index) vi: u32) -> VertexOutput {
var vo: VertexOutput;
vo.clip_pos = vec4<f32>(f32(vi & 1u) * 4.0 - 1.0, f32(vi & 2u) * 2.0 - 1.0, 0.0, 1.0);
vo.input_size = textureDimensions(light_diffuse, 0);
return vo;
}

@fragment
fn blit_fs(vo: VertexOutput) -> @location(0) vec4<f32> {
let tc = vec2<i32>(i32(vo.clip_pos.x), i32(vo.input_size.y) - i32(vo.clip_pos.y) - 1);
fn postfx_fs(vo: VertexOutput) -> @location(0) vec4<f32> {
let tc = vec2<i32>(i32(vo.clip_pos.x), i32(vo.clip_pos.y));
let illumunation = textureLoad(light_diffuse, tc, 0);
if (debug_params.view_mode == DebugMode_Final) {
let albedo = textureLoad(t_albedo, tc, 0).xyz;
Expand Down
120 changes: 62 additions & 58 deletions blade-render/code/ray-trace.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u;

const PI: f32 = 3.1415926;
const MAX_RESERVOIRS: u32 = 2u;
// See "9.1 pairwise mis for robust reservoir reuse"
// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport"
const PAIRWISE_MIS: bool = true;
// Base MIS for canonical samples. The constant isolates a critical difference between
// Bitterli's pseudocode (where it's 1) and NVidia's RTXDI implementation (where it's 0).
// With Bitterli's 1 we have MIS not respecting the prior history enough.
const BASE_CANONICAL_MIS: f32 = 0.05;
// See "DECOUPLING SHADING AND REUSE" in
// "Rearchitecting Spatiotemporal Resampling for Production"
const DECOUPLED_SHADING: bool = false;
const DECOUPLED_SHADING: bool = true;

// We are considering 2x2 grid, so must be <= 4
const FACTOR_TEMPORAL_CANDIDATES: u32 = 1u;
// How many more candidates to consder than the taps we need
const FACTOR_SPATIAL_CANDIDATES: u32 = 3u;
// Has to be at least discarding the 2x2 block
const MIN_SPATIAL_REUSE_DISTANCE: i32 = 7;

struct MainParams {
frame_index: u32,
Expand All @@ -33,6 +33,8 @@ struct MainParams {
spatial_tap_history: u32,
spatial_radius: i32,
t_start: f32,
use_pairwise_mis: u32,
defensive_mis: f32,
use_motion_vectors: u32,
};

Expand Down Expand Up @@ -320,27 +322,15 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3<f32>, debu
return brdf;
}

struct HeuristicFactors {
weight: f32,
//history: f32,
}

fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors {
var hf: HeuristicFactors;
let balance_denom = h0 * w0 + h1 * w1;
hf.weight = select(h0 * w0 / balance_denom, 0.0, balance_denom <= 0.0);
//hf.history = select(pow(clamp(w1 / w0, 0.0, 1.0), 8.0), 1.0, w0 <= 0.0);
return hf;
fn ratio(a: f32, b: f32) -> f32 {
return select(0.0, a / (a+b), a+b > 0.0);
}

struct RestirOutput {
radiance: vec3<f32>,
}

fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomState>, enable_debug: bool) -> RestirOutput {
if (debug.view_mode == DebugMode_Depth) {
textureStore(out_debug, pixel, vec4<f32>(surface.depth / camera.depth));
}
let ray_dir = get_ray_direction(camera, pixel);
let pixel_index = get_reservoir_index(pixel, camera);
if (surface.depth == 0.0) {
Expand All @@ -349,10 +339,13 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
return RestirOutput(env);
}

if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Depth) {
textureStore(out_debug, pixel, vec4<f32>(1.0 / surface.depth));
}
let debug_len = select(0.0, surface.depth * 0.2, enable_debug);
let position = camera.position + surface.depth * ray_dir;
let normal = qrot(surface.basis, vec3<f32>(0.0, 0.0, 1.0));
if (debug.view_mode == DebugMode_Normal) {
if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Normal) {
textureStore(out_debug, pixel, vec4<f32>(normal, 0.0));
}

Expand All @@ -374,22 +367,36 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
}
}

//TODO: find best match in a 2x2 grid
let prev_pixel = vec2<i32>(get_prev_pixel(pixel, position));
let center_coord = get_prev_pixel(pixel, position);
let center_pixel = vec2<i32>(center_coord);
// Trick to start with closer pixels: we derive the "further"
// pixel in 2x2 grid by considering the sum.
let further_pixel = vec2<i32>(center_coord - 0.5) + vec2<i32>(center_coord + 0.5) - center_pixel;

// First, gather the list of reservoirs to merge with
var accepted_reservoir_indices = array<i32, MAX_RESERVOIRS>();
var accepted_count = 0u;
var temporal_index = ~0u;
for (var tap = 0u; tap <= parameters.spatial_taps; tap += 1u) {
var other_pixel = prev_pixel;
if (tap != 0u) {
let r0 = max(prev_pixel - vec2<i32>(parameters.spatial_radius), vec2<i32>(0));
let r1 = min(prev_pixel + vec2<i32>(parameters.spatial_radius + 1), vec2<i32>(prev_camera.target_size));
let num_temporal_candidates = parameters.temporal_tap * FACTOR_TEMPORAL_CANDIDATES;
let num_candidates = num_temporal_candidates + parameters.spatial_taps * FACTOR_SPATIAL_CANDIDATES;
let max_samples = min(MAX_RESERVOIRS, 1u + parameters.spatial_taps);

for (var tap = 0u; tap <= num_candidates && accepted_count < max_samples; tap += 1u) {
var other_pixel = center_pixel;
if (tap < num_temporal_candidates) {
if (temporal_index < tap) {
continue;
}
let mask = vec2<u32>(tap) & vec2<u32>(1u, 2u);
other_pixel = select(center_pixel, further_pixel, mask != vec2<u32>(0u));
} else {
let r0 = max(center_pixel - vec2<i32>(parameters.spatial_radius), vec2<i32>(0));
let r1 = min(center_pixel + vec2<i32>(parameters.spatial_radius + 1), vec2<i32>(prev_camera.target_size));
other_pixel = vec2<i32>(mix(vec2<f32>(r0), vec2<f32>(r1), vec2<f32>(random_gen(rng), random_gen(rng))));
} else if (parameters.temporal_tap == 0u)
{
continue;
let diff = other_pixel - center_pixel;
if (dot(diff, diff) < MIN_SPATIAL_REUSE_DISTANCE) {
continue;
}
}

let other_index = get_reservoir_index(other_pixel, prev_camera);
Expand All @@ -407,27 +414,35 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
continue;
}

if (tap == 0u) {
if (tap < num_temporal_candidates) {
temporal_index = accepted_count;
}
accepted_reservoir_indices[accepted_count] = other_index;
if (accepted_count < MAX_RESERVOIRS) {
accepted_count += 1u;
accepted_count += 1u;
}

if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SampleReuse) {
var color = vec4<f32>(0.0);
for (var i = 0u; i < min(3u, accepted_count); i += 1u) {
color[i] = 1.0;
}
textureStore(out_debug, pixel, color);
}

// Next, evaluate the MIS of each of the samples versus the canonical one.
var reservoir = LiveReservoir();
var shaded_color = vec3<f32>(0.0);
var mis_canonical = BASE_CANONICAL_MIS;
var color_and_weight = vec4<f32>(0.0);
let mis_scale = 1.0 / (f32(accepted_count) + parameters.defensive_mis);
var mis_canonical = select(mis_scale * parameters.defensive_mis, 1.0, accepted_count == 0u || parameters.use_pairwise_mis == 0u);
let inv_count = 1.0 / f32(accepted_count);

for (var rid = 0u; rid < accepted_count; rid += 1u) {
let neighbor_index = accepted_reservoir_indices[rid];
let neighbor = prev_reservoirs[neighbor_index];

let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, rid == temporal_index);
var other: LiveReservoir;
if (PAIRWISE_MIS) {
if (parameters.use_pairwise_mis != 0u) {
let neighbor_pixel = get_pixel_from_reservoir_index(neighbor_index, prev_camera);
let neighbor_history = min(neighbor.confidence, f32(max_history));
{ // scoping this to hint the register allocation
Expand All @@ -437,30 +452,20 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS

let t_canonical_at_neighbor = estimate_target_score_with_occlusion(
neighbor_surface, neighbor_position, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len);
let mis_sub_canonical = balance_heuristic(
t_canonical_at_neighbor.score, canonical.selected_target_score,
neighbor_history * f32(accepted_count), canonical.history);
mis_canonical += 1.0 - mis_sub_canonical.weight;
let r_canonical = ratio(canonical.history * canonical.selected_target_score * inv_count, neighbor_history * t_canonical_at_neighbor.score);
mis_canonical += mis_scale * r_canonical;
}

// Notes about t_neighbor_at_neighbor:
// 1. we assume lights aren't moving. Technically we should check if the
// target light has moved, and re-evaluate the occlusion.
// 2. we can use the cached target score, and there is no use of the target color
//let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir);
let t_neighbor_at_canonical = estimate_target_score_with_occlusion(
surface, position, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len);
let mis_neighbor = balance_heuristic(
neighbor.target_score, t_neighbor_at_canonical.score,
neighbor_history * f32(accepted_count), canonical.history);
let r_neighbor = ratio(neighbor_history * neighbor.target_score, canonical.history * t_neighbor_at_canonical.score * inv_count);
let mis_neighbor = mis_scale * r_neighbor;

other.history = neighbor_history;
other.selected_light_index = neighbor.light_index;
other.selected_uv = neighbor.light_uv;
other.selected_target_score = t_neighbor_at_canonical.score;
other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight;
//Note: should be needed according to the paper
// other.history *= min(mis_neighbor.history, mis_sub_canonical.history);
other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor;
other.radiance = t_neighbor_at_canonical.color;
} else {
other = unpack_reservoir(neighbor, max_history);
Expand All @@ -478,17 +483,16 @@ fn compute_restir(surface: Surface, pixel: vec2<i32>, rng: ptr<function, RandomS
}

// Finally, merge in the canonical sample
if (PAIRWISE_MIS) {
if (parameters.use_pairwise_mis != 0) {
canonical.weight_sum *= mis_canonical / canonical.history;
}
if (DECOUPLED_SHADING) {
//FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS?
let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1);
let cw = canonical.weight_sum / max(canonical.selected_target_score, 0.1);
color_and_weight += canonical.weight_sum * vec4<f32>(cw * canonical.radiance, 1.0);
}
merge_reservoir(&reservoir, canonical, random_gen(rng));

let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS);
let effective_history = select(reservoir.history, 1.0, parameters.use_pairwise_mis != 0);
let stored = pack_reservoir_detail(reservoir, effective_history);
reservoirs[pixel_index] = stored;
var ro = RestirOutput();
Expand Down
Loading
Loading