diff --git a/Cargo.lock b/Cargo.lock index 528d6909..fae006ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4012,6 +4012,7 @@ dependencies = [ "peniko", "raw-window-handle", "skrifa", + "slotmap", "vello_encoding", "wgpu", "wgpu-profiler", diff --git a/Cargo.toml b/Cargo.toml index a88304ef..21196fa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,7 @@ log = { workspace = true } raw-window-handle = { workspace = true } futures-intrusive = { workspace = true } wgpu-profiler = { workspace = true, optional = true } +slotmap = { workspace = true } [workspace.lints] clippy.doc_markdown = "warn" @@ -77,6 +78,7 @@ skrifa = "0.19.0" peniko = "0.1.0" futures-intrusive = "0.5.0" raw-window-handle = "0.6.0" +slotmap = "1.0.7" # NOTE: Make sure to keep this in sync with the version badge in README.md wgpu = { version = "0.19.3" } diff --git a/src/lib.rs b/src/lib.rs index ed56b5ff..e519b53e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,6 +87,7 @@ mod cpu_dispatch; mod cpu_shader; mod recording; mod render; +mod render_graph; mod scene; mod shaders; #[cfg(feature = "wgpu")] @@ -100,6 +101,10 @@ pub use peniko; /// 2D geometry, with a focus on curves. pub use peniko::kurbo; +use render_graph::{ + passes::{VelloCoarse, VelloFine}, + RenderGraph, ResourceManager, +}; #[doc(hidden)] pub use skrifa; @@ -286,11 +291,32 @@ impl Renderer { texture: &TextureView, params: &RenderParams, ) -> Result<()> { - let (recording, target) = render::render_full(scene, &self.shaders, params); - let external_resources = [ExternalResource::Image( - *target.as_image().unwrap(), - texture, - )]; + let mut render_graph = RenderGraph::new(); + + let mut resources = ResourceManager::new(); + let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8); + let out_image = resources.import_image(out_image); + + let coarse = render_graph.insert_pass((), |()| VelloCoarse {}); + let _fine = render_graph.insert_pass((coarse,), move |(coarse,)| VelloFine { + config_buf: coarse.config_buf, + tile_buf: coarse.tile_buf, + segments_buf: coarse.segments_buf, + ptcl_buf: coarse.ptcl_buf, + gradient_image: coarse.gradient_image, + info_bin_data_buf: coarse.info_bin_data_buf, + image_atlas: coarse.image_atlas, + out_image, + fine_workgroup_size: coarse.fine_workgroup_size, + }); + + let Some(recording) = + render_graph.process(resources, params, &self.shaders, scene.encoding(), false) + else { + panic!("Cyclic Render Graph"); + }; + + let external_resources = [ExternalResource::Image(out_image.into(), texture)]; self.engine.run_recording( device, queue, diff --git a/src/recording.rs b/src/recording.rs index 0adb0f34..3769ef93 100644 --- a/src/recording.rs +++ b/src/recording.rs @@ -148,10 +148,9 @@ impl Recording { pub fn dispatch(&mut self, shader: ShaderId, wg_size: (u32, u32, u32), resources: R) where - R: IntoIterator, - R::Item: Into, + R: IntoResourceProxies, { - let r = resources.into_iter().map(|r| r.into()).collect(); + let r = resources.into_resource_proxies(); self.push(Command::Dispatch(shader, wg_size, r)); } @@ -168,10 +167,9 @@ impl Recording { offset: u64, resources: R, ) where - R: IntoIterator, - R::Item: Into, + R: IntoResourceProxies, { - let r = resources.into_iter().map(|r| r.into()).collect(); + let r = resources.into_resource_proxies(); self.push(Command::DispatchIndirect(shader, buf, offset, r)); } @@ -206,6 +204,11 @@ impl Recording { } } + /// Moves all the commands of other into self, leaving other empty. + pub fn append(&mut self, other: &mut Recording) { + self.commands.append(&mut other.commands); + } + /// Returns a [`Vec`] containing all the [`Command`]s in order. pub fn into_commands(self) -> Vec { self.commands @@ -277,3 +280,33 @@ impl From for ResourceProxy { Self::Image(value) } } + +pub trait IntoResourceProxies { + fn into_resource_proxies(self) -> Vec; +} + +macro_rules! impl_into_resource_proxies { + ( $(($generic:ident, $index:tt))+ ) => { + impl<$($generic: Into),+> IntoResourceProxies for ($($generic,)+) { + #[inline] + fn into_resource_proxies(self) -> Vec { + vec![ + $( + self.$index.into(), + )+ + ] + } + } + }; +} + +impl_into_resource_proxies!((A, 0)); +impl_into_resource_proxies!((A, 0)(B, 1)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)(H, 7)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)(H, 7)(I, 8)); +impl_into_resource_proxies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)(H, 7)(I, 8)(J, 9)); diff --git a/src/render.rs b/src/render.rs index 279f1451..86990d9b 100644 --- a/src/render.rs +++ b/src/render.rs @@ -23,14 +23,14 @@ pub struct Render { struct FineResources { aa_config: AaConfig, - config_buf: ResourceProxy, - bump_buf: ResourceProxy, - tile_buf: ResourceProxy, - segments_buf: ResourceProxy, - ptcl_buf: ResourceProxy, - gradient_image: ResourceProxy, - info_bin_data_buf: ResourceProxy, - image_atlas: ResourceProxy, + config_buf: BufferProxy, + bump_buf: BufferProxy, // this gets freed and afterwards put in this resources, why? + tile_buf: BufferProxy, + segments_buf: BufferProxy, + ptcl_buf: BufferProxy, + gradient_image: ImageProxy, + info_bin_data_buf: BufferProxy, + image_atlas: ImageProxy, out_image: ImageProxy, } @@ -90,19 +90,15 @@ impl Render { use vello_encoding::{RenderConfig, Resolver}; let mut recording = Recording::default(); + let mut resolver = Resolver::new(); let mut packed = vec![]; let (layout, ramps, images) = resolver.resolve(encoding, &mut packed); let gradient_image = if ramps.height == 0 { - ResourceProxy::new_image(1, 1, ImageFormat::Rgba8) + ImageProxy::new(1, 1, ImageFormat::Rgba8) } else { let data: &[u8] = bytemuck::cast_slice(ramps.data); - ResourceProxy::Image(recording.upload_image( - ramps.width, - ramps.height, - ImageFormat::Rgba8, - data, - )) + recording.upload_image(ramps.width, ramps.height, ImageFormat::Rgba8, data) }; let image_atlas = if images.images.is_empty() { ImageProxy::new(1, 1, ImageFormat::Rgba8) @@ -125,20 +121,17 @@ impl Render { let buffer_sizes = &cpu_config.buffer_sizes; let wg_counts = &cpu_config.workgroup_counts; - let scene_buf = ResourceProxy::Buffer(recording.upload("scene", packed)); - let config_buf = ResourceProxy::Buffer( - recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)), - ); - let info_bin_data_buf = ResourceProxy::new_buf( + let scene_buf = recording.upload("scene", packed); + let config_buf = recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)); + let info_bin_data_buf = BufferProxy::new( buffer_sizes.bin_data.size_in_bytes() as u64, "info_bin_data_buf", ); - let tile_buf = - ResourceProxy::new_buf(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf"); + let tile_buf = BufferProxy::new(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf"); let segments_buf = - ResourceProxy::new_buf(buffer_sizes.segments.size_in_bytes().into(), "segments_buf"); - let ptcl_buf = ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf"); - let reduced_buf = ResourceProxy::new_buf( + BufferProxy::new(buffer_sizes.segments.size_in_bytes().into(), "segments_buf"); + let ptcl_buf = BufferProxy::new(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf"); + let reduced_buf = BufferProxy::new( buffer_sizes.path_reduced.size_in_bytes().into(), "reduced_buf", ); @@ -146,35 +139,35 @@ impl Render { recording.dispatch( shaders.pathtag_reduce, wg_counts.path_reduce, - [config_buf, scene_buf, reduced_buf], + (config_buf, scene_buf, reduced_buf), ); let mut pathtag_parent = reduced_buf; let mut large_pathtag_bufs = None; let use_large_path_scan = wg_counts.use_large_path_scan && !shaders.pathtag_is_cpu; if use_large_path_scan { - let reduced2_buf = ResourceProxy::new_buf( + let reduced2_buf = BufferProxy::new( buffer_sizes.path_reduced2.size_in_bytes().into(), "reduced2_buf", ); recording.dispatch( shaders.pathtag_reduce2, wg_counts.path_reduce2, - [reduced_buf, reduced2_buf], + (reduced_buf, reduced2_buf), ); - let reduced_scan_buf = ResourceProxy::new_buf( + let reduced_scan_buf = BufferProxy::new( buffer_sizes.path_reduced_scan.size_in_bytes().into(), "reduced_scan_buf", ); recording.dispatch( shaders.pathtag_scan1, wg_counts.path_scan1, - [reduced_buf, reduced2_buf, reduced_scan_buf], + (reduced_buf, reduced2_buf, reduced_scan_buf), ); pathtag_parent = reduced_scan_buf; large_pathtag_bufs = Some((reduced2_buf, reduced_scan_buf)); } - let tagmonoid_buf = ResourceProxy::new_buf( + let tagmonoid_buf = BufferProxy::new( buffer_sizes.path_monoids.size_in_bytes().into(), "tagmonoid_buf", ); @@ -186,60 +179,58 @@ impl Render { recording.dispatch( pathtag_scan, wg_counts.path_scan, - [config_buf, scene_buf, pathtag_parent, tagmonoid_buf], + (config_buf, scene_buf, pathtag_parent, tagmonoid_buf), ); - recording.free_resource(reduced_buf); + recording.free_resource(reduced_buf.into()); if let Some((reduced2, reduced_scan)) = large_pathtag_bufs { - recording.free_resource(reduced2); - recording.free_resource(reduced_scan); + recording.free_resource(reduced2.into()); + recording.free_resource(reduced_scan.into()); } - let path_bbox_buf = ResourceProxy::new_buf( + let path_bbox_buf = BufferProxy::new( buffer_sizes.path_bboxes.size_in_bytes().into(), "path_bbox_buf", ); recording.dispatch( shaders.bbox_clear, wg_counts.bbox_clear, - [config_buf, path_bbox_buf], + (config_buf, path_bbox_buf), ); let bump_buf = BufferProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf"); recording.clear_all(bump_buf); - let bump_buf = ResourceProxy::Buffer(bump_buf); - let lines_buf = - ResourceProxy::new_buf(buffer_sizes.lines.size_in_bytes().into(), "lines_buf"); + let lines_buf = BufferProxy::new(buffer_sizes.lines.size_in_bytes().into(), "lines_buf"); recording.dispatch( shaders.flatten, wg_counts.flatten, - [ + ( config_buf, scene_buf, tagmonoid_buf, path_bbox_buf, bump_buf, lines_buf, - ], + ), ); - let draw_reduced_buf = ResourceProxy::new_buf( + let draw_reduced_buf = BufferProxy::new( buffer_sizes.draw_reduced.size_in_bytes().into(), "draw_reduced_buf", ); recording.dispatch( shaders.draw_reduce, wg_counts.draw_reduce, - [config_buf, scene_buf, draw_reduced_buf], + (config_buf, scene_buf, draw_reduced_buf), ); - let draw_monoid_buf = ResourceProxy::new_buf( + let draw_monoid_buf = BufferProxy::new( buffer_sizes.draw_monoids.size_in_bytes().into(), "draw_monoid_buf", ); - let clip_inp_buf = ResourceProxy::new_buf( + let clip_inp_buf = BufferProxy::new( buffer_sizes.clip_inps.size_in_bytes().into(), "clip_inp_buf", ); recording.dispatch( shaders.draw_leaf, wg_counts.draw_leaf, - [ + ( config_buf, scene_buf, draw_reduced_buf, @@ -247,12 +238,12 @@ impl Render { draw_monoid_buf, info_bin_data_buf, clip_inp_buf, - ], + ), ); - recording.free_resource(draw_reduced_buf); + recording.free_resource(draw_reduced_buf.into()); let clip_el_buf = - ResourceProxy::new_buf(buffer_sizes.clip_els.size_in_bytes().into(), "clip_el_buf"); - let clip_bic_buf = ResourceProxy::new_buf( + BufferProxy::new(buffer_sizes.clip_els.size_in_bytes().into(), "clip_el_buf"); + let clip_bic_buf = BufferProxy::new( buffer_sizes.clip_bics.size_in_bytes().into(), "clip_bic_buf", ); @@ -260,10 +251,10 @@ impl Render { recording.dispatch( shaders.clip_reduce, wg_counts.clip_reduce, - [clip_inp_buf, path_bbox_buf, clip_bic_buf, clip_el_buf], + (clip_inp_buf, path_bbox_buf, clip_bic_buf, clip_el_buf), ); } - let clip_bbox_buf = ResourceProxy::new_buf( + let clip_bbox_buf = BufferProxy::new( buffer_sizes.clip_bboxes.size_in_bytes().into(), "clip_bbox_buf", ); @@ -271,7 +262,7 @@ impl Render { recording.dispatch( shaders.clip_leaf, wg_counts.clip_leaf, - [ + ( config_buf, clip_inp_buf, path_bbox_buf, @@ -279,24 +270,24 @@ impl Render { clip_el_buf, draw_monoid_buf, clip_bbox_buf, - ], + ), ); } - recording.free_resource(clip_inp_buf); - recording.free_resource(clip_bic_buf); - recording.free_resource(clip_el_buf); - let draw_bbox_buf = ResourceProxy::new_buf( + recording.free_resource(clip_inp_buf.into()); + recording.free_resource(clip_bic_buf.into()); + recording.free_resource(clip_el_buf.into()); + let draw_bbox_buf = BufferProxy::new( buffer_sizes.draw_bboxes.size_in_bytes().into(), "draw_bbox_buf", ); - let bin_header_buf = ResourceProxy::new_buf( + let bin_header_buf = BufferProxy::new( buffer_sizes.bin_headers.size_in_bytes().into(), "bin_header_buf", ); recording.dispatch( shaders.binning, wg_counts.binning, - [ + ( config_buf, draw_monoid_buf, path_bbox_buf, @@ -305,11 +296,11 @@ impl Render { bump_buf, info_bin_data_buf, bin_header_buf, - ], + ), ); - recording.free_resource(draw_monoid_buf); - recording.free_resource(path_bbox_buf); - recording.free_resource(clip_bbox_buf); + recording.free_resource(draw_monoid_buf.into()); + recording.free_resource(path_bbox_buf.into()); + recording.free_resource(clip_bbox_buf.into()); // Note: this only needs to be rounded up because of the workaround to store the tile_offset // in storage rather than workgroup memory. let path_buf = @@ -317,17 +308,17 @@ impl Render { recording.dispatch( shaders.tile_alloc, wg_counts.tile_alloc, - [ + ( config_buf, scene_buf, draw_bbox_buf, bump_buf, path_buf, tile_buf, - ], + ), ); - recording.free_resource(draw_bbox_buf); - recording.free_resource(tagmonoid_buf); + recording.free_resource(draw_bbox_buf.into()); + recording.free_resource(tagmonoid_buf.into()); let indirect_count_buf = BufferProxy::new( buffer_sizes.indirect_count.size_in_bytes().into(), "indirect_count", @@ -335,9 +326,9 @@ impl Render { recording.dispatch( shaders.path_count_setup, wg_counts.path_count_setup, - [bump_buf, indirect_count_buf.into()], + (bump_buf, indirect_count_buf), ); - let seg_counts_buf = ResourceProxy::new_buf( + let seg_counts_buf = BufferProxy::new( buffer_sizes.seg_counts.size_in_bytes().into(), "seg_counts_buf", ); @@ -345,24 +336,24 @@ impl Render { shaders.path_count, indirect_count_buf, 0, - [ + ( config_buf, bump_buf, lines_buf, path_buf, tile_buf, seg_counts_buf, - ], + ), ); recording.dispatch( shaders.backdrop, wg_counts.backdrop, - [config_buf, path_buf, tile_buf], + (config_buf, path_buf, tile_buf), ); recording.dispatch( shaders.coarse, wg_counts.coarse, - [ + ( config_buf, scene_buf, draw_monoid_buf, @@ -372,33 +363,39 @@ impl Render { tile_buf, bump_buf, ptcl_buf, - ], + ), ); recording.dispatch( shaders.path_tiling_setup, wg_counts.path_tiling_setup, - [bump_buf, indirect_count_buf.into(), ptcl_buf], + (bump_buf, indirect_count_buf, ptcl_buf), ); recording.dispatch_indirect( shaders.path_tiling, indirect_count_buf, 0, - [ + ( bump_buf, seg_counts_buf, lines_buf, path_buf, tile_buf, segments_buf, - ], + ), ); recording.free_buffer(indirect_count_buf); - recording.free_resource(seg_counts_buf); - recording.free_resource(lines_buf); - recording.free_resource(scene_buf); - recording.free_resource(draw_monoid_buf); - recording.free_resource(bin_header_buf); + recording.free_resource(seg_counts_buf.into()); + recording.free_resource(lines_buf.into()); + recording.free_resource(scene_buf.into()); + recording.free_resource(draw_monoid_buf.into()); + recording.free_resource(bin_header_buf.into()); recording.free_resource(path_buf); + + if robust { + recording.download(bump_buf); + } + recording.free_resource(bump_buf.into()); + let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8); self.fine_wg_count = Some(wg_counts.fine); self.fine_resources = Some(FineResources { @@ -410,13 +407,10 @@ impl Render { ptcl_buf, gradient_image, info_bin_data_buf, - image_atlas: ResourceProxy::Image(image_atlas), + image_atlas, out_image, }); - if robust { - recording.download(*bump_buf.as_buf().unwrap()); - } - recording.free_resource(bump_buf); + recording } @@ -431,15 +425,15 @@ impl Render { .fine_area .expect("shaders not configured to support AA mode: area"), fine_wg_count, - [ + ( fine.config_buf, fine.segments_buf, fine.ptcl_buf, fine.info_bin_data_buf, - ResourceProxy::Image(fine.out_image), + fine.out_image, fine.gradient_image, fine.image_atlas, - ], + ), ); } _ => { @@ -464,26 +458,26 @@ impl Render { recording.dispatch( fine_shader, fine_wg_count, - [ + ( fine.config_buf, fine.segments_buf, fine.ptcl_buf, fine.info_bin_data_buf, - ResourceProxy::Image(fine.out_image), + fine.out_image, fine.gradient_image, fine.image_atlas, self.mask_buf.unwrap(), - ], + ), ); } } - recording.free_resource(fine.config_buf); - recording.free_resource(fine.tile_buf); - recording.free_resource(fine.segments_buf); - recording.free_resource(fine.ptcl_buf); - recording.free_resource(fine.gradient_image); - recording.free_resource(fine.image_atlas); - recording.free_resource(fine.info_bin_data_buf); + recording.free_resource(fine.config_buf.into()); + recording.free_resource(fine.tile_buf.into()); + recording.free_resource(fine.segments_buf.into()); + recording.free_resource(fine.ptcl_buf.into()); + recording.free_resource(fine.gradient_image.into()); + recording.free_resource(fine.image_atlas.into()); + recording.free_resource(fine.info_bin_data_buf.into()); // TODO: make mask buf persistent if let Some(mask_buf) = self.mask_buf.take() { recording.free_resource(mask_buf); @@ -499,12 +493,6 @@ impl Render { } pub fn bump_buf(&self) -> BufferProxy { - *self - .fine_resources - .as_ref() - .unwrap() - .bump_buf - .as_buf() - .unwrap() + self.fine_resources.as_ref().unwrap().bump_buf } } diff --git a/src/render_graph/mod.rs b/src/render_graph/mod.rs new file mode 100644 index 00000000..bc90a048 --- /dev/null +++ b/src/render_graph/mod.rs @@ -0,0 +1,307 @@ +use std::{any::Any, marker::PhantomData}; + +use slotmap::{new_key_type, SecondaryMap, SlotMap}; +use vello_encoding::Encoding; + +use crate::{BufferProxy, FullShaders, ImageProxy, Recording, RenderParams, ResourceProxy}; + +use self::passes::RenderPass; + +pub mod passes; + +new_key_type! { + pub struct PassId; +} + +pub struct Pass(PassId, PhantomData

); + +pub struct RenderGraph { + nodes: SlotMap>, + dependencies: SecondaryMap>, + dependants: SecondaryMap>, +} + +impl RenderGraph { + pub fn new() -> Self { + RenderGraph { + nodes: SlotMap::with_key(), + dependencies: SecondaryMap::new(), + dependants: SecondaryMap::new(), + } + } + + pub fn insert_pass< + D: IntoPassDependencies + 'static, + P: RenderPass + 'static, + F: Fn(D::Outputs) -> P + 'static, + >( + &mut self, + dependencies: D, + pass_builder: F, + ) -> Pass

{ + let erased: PhantomPass = PhantomPass { + f: pass_builder, + phantom: PhantomData, + }; + let id = self.nodes.insert(Box::new(erased)); + self.dependants.insert(id, vec![]); + let deps = dependencies.into_pass_dependencies(); + for dep in &deps { + self.dependants[*dep].push(id); + } + self.dependencies.insert(id, deps); + + Pass(id, PhantomData) + } + + pub fn process( + &self, + mut resources: ResourceManager, + params: &RenderParams, + shaders: &FullShaders, + encoding: &Encoding, + robust: bool, + ) -> Option { + let mut recording = Recording::default(); + + let mut stack = Vec::with_capacity(self.nodes.len()); + let mut counter = SecondaryMap::with_capacity(self.nodes.len()); + let mut result = Vec::with_capacity(self.nodes.len()); + + for (id, d) in &self.dependencies { + counter.insert(id, d.len()); + if d.len() == 0 { + stack.push(id); + } + } + + while let Some(id) = stack.pop() { + result.push(id); + for &dependant in &self.dependants[id] { + counter[dependant] -= 1; + if counter[dependant] == 0 { + stack.push(dependant); + } + } + } + + if result.len() != self.nodes.len() { + return None; + } + + let mut outputs: SecondaryMap> = SecondaryMap::new(); + + for pass in result { + let mut pass_recording = unsafe { + self.nodes[pass].record( + pass, + &self.dependencies[pass], + &mut outputs, + PassContext { + resources: &mut resources, + params, + shaders, + encoding, + robust, + }, + ) + }; + recording.append(&mut pass_recording); + } + + for (_, resource) in resources.resources.into_iter() { + match resource { + ManagableResource::Managed { proxy } => { + recording.free_resource(proxy); + } + ManagableResource::Imported => {} + } + } + + Some(recording) + } +} + +new_key_type! { + pub struct ResourceId; +} + +#[derive(Clone, Copy)] +pub struct Handle { + id: ResourceId, + proxy: T, +} + +impl Into for Handle { + fn into(self) -> ResourceProxy { + ResourceProxy::Image(self.proxy) + } +} +impl Into for Handle { + fn into(self) -> ImageProxy { + self.proxy + } +} +impl Into for Handle { + fn into(self) -> ResourceProxy { + ResourceProxy::Buffer(self.proxy) + } +} +impl Into for Handle { + fn into(self) -> BufferProxy { + self.proxy + } +} + +enum ManagableResource { + Managed { proxy: ResourceProxy }, + Imported, +} + +pub struct ResourceManager { + resources: SlotMap, +} + +impl ResourceManager { + pub fn new() -> Self { + Self { + resources: SlotMap::with_key(), + } + } + + pub fn managed_image(&mut self, image: ImageProxy) -> Handle { + let id = self.resources.insert(ManagableResource::Managed { + proxy: image.into(), + }); + Handle { id, proxy: image } + } + + pub fn import_image(&mut self, image: ImageProxy) -> Handle { + let id = self.resources.insert(ManagableResource::Imported); + Handle { id, proxy: image } + } + + pub fn managed_buffer(&mut self, buffer: BufferProxy) -> Handle { + let id = self.resources.insert(ManagableResource::Managed { + proxy: buffer.into(), + }); + Handle { id, proxy: buffer } + } + + pub fn import_buffer(&mut self, buffer: BufferProxy) -> Handle { + let id = self.resources.insert(ManagableResource::Imported); + Handle { id, proxy: buffer } + } +} + +pub trait IntoPassDependencies { + type Outputs: Clone + Copy; + + fn into_pass_dependencies(self) -> Vec; + + // SAFETY: assoc_data should match in length with outputs, returned any should have the type of the output + unsafe fn outputs_map<'d, 'a, OD, OF>(assoc_data: &'d [OD], f: OF) -> Self::Outputs + where + OF: Fn(&'d OD) -> &'a dyn Any; +} + +impl IntoPassDependencies for () { + type Outputs = (); + + fn into_pass_dependencies(self) -> Vec { + vec![] + } + + unsafe fn outputs_map<'d, 'a, OD, OF>(_assoc_data: &'d [OD], _f: OF) -> Self::Outputs + where + OF: Fn(&'d OD) -> &'a dyn Any, + { + () + } +} + +macro_rules! impl_into_pass_dependencies { + ( $(($generic:ident, $index:tt))+ ) => { + impl<$($generic: RenderPass),+> IntoPassDependencies for ($(Pass<$generic>,)+) { + type Outputs = ($($generic::Output,)+); + + #[inline] + fn into_pass_dependencies(self) -> Vec { + vec![ + $( + self.$index .0, + )+ + ] + } + + unsafe fn outputs_map<'d, 'a, OD, OF>(assoc_data: &'d [OD], f: OF) -> Self::Outputs + where + OF: Fn(&'d OD) -> &'a dyn Any, + { + ($( + { + let any = f(&assoc_data[$index]); + unsafe { + any.downcast_ref::<$generic::Output>().unwrap_unchecked().clone() + } + }, + )+) + } + } + }; +} + +impl_into_pass_dependencies!((A, 0)); +impl_into_pass_dependencies!((A, 0)(B, 1)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)(H, 7)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)(H, 7)(I, 8)); +impl_into_pass_dependencies!((A, 0)(B, 1)(C, 2)(D, 3)(E, 4)(F, 5)(G, 6)(H, 7)(I, 8)(J, 9)); + +struct PhantomPass { + f: F, + phantom: PhantomData<(D, P)>, +} + +trait ErasedPass { + // SAFETY: make sure that all dependencies have already run and that the ids are valid and in the right order! + unsafe fn record( + &self, + id: PassId, + deps: &[PassId], + outputs: &mut SecondaryMap>, + cx: PassContext<'_>, + ) -> Recording; +} + +impl ErasedPass for PhantomPass +where + F: Fn(D::Outputs) -> P, +{ + unsafe fn record( + &self, + id: PassId, + deps: &[PassId], + outputs: &mut SecondaryMap>, + cx: PassContext<'_>, + ) -> Recording { + // SAFETY: user assures that everything is correct. + let dep_outputs = D::outputs_map(deps, |dep| outputs[*dep].as_ref()); + let pass = (self.f)(dep_outputs); + let (recording, output) = pass.record(cx); + outputs.insert(id, Box::new(output)); + recording + } +} + +pub struct PassContext<'c> { + pub resources: &'c mut ResourceManager, + pub params: &'c RenderParams, + pub shaders: &'c FullShaders, + pub encoding: &'c Encoding, + pub robust: bool, +} diff --git a/src/render_graph/passes/coarse.rs b/src/render_graph/passes/coarse.rs new file mode 100644 index 00000000..d47e5708 --- /dev/null +++ b/src/render_graph/passes/coarse.rs @@ -0,0 +1,357 @@ +use vello_encoding::{RenderConfig, Resolver, WorkgroupSize}; + +use crate::{ + render_graph::{Handle, PassContext}, + BufferProxy, ImageFormat, ImageProxy, Recording, ResourceProxy, +}; + +use super::RenderPass; + +pub struct VelloCoarse {} + +#[derive(Clone, Copy)] +pub struct CoarseOutput { + pub config_buf: Handle, + pub tile_buf: Handle, + pub segments_buf: Handle, + pub ptcl_buf: Handle, + pub gradient_image: Handle, + pub info_bin_data_buf: Handle, + pub image_atlas: Handle, + + pub fine_workgroup_size: WorkgroupSize, +} + +impl RenderPass for VelloCoarse { + type Output = CoarseOutput; + + fn record(self, cx: PassContext<'_>) -> (Recording, Self::Output) + where + Self: Sized, + { + let mut recording = Recording::default(); + + let mut resolver = Resolver::new(); + let mut packed = vec![]; + let (layout, ramps, images) = resolver.resolve(cx.encoding, &mut packed); + let gradient_image = if ramps.height == 0 { + ImageProxy::new(1, 1, ImageFormat::Rgba8) + } else { + let data: &[u8] = bytemuck::cast_slice(ramps.data); + recording.upload_image(ramps.width, ramps.height, ImageFormat::Rgba8, data) + }; + let image_atlas = if images.images.is_empty() { + ImageProxy::new(1, 1, ImageFormat::Rgba8) + } else { + ImageProxy::new(images.width, images.height, ImageFormat::Rgba8) + }; + for image in images.images { + recording.write_image( + image_atlas, + image.1, + image.2, + image.0.width, + image.0.height, + image.0.data.data(), + ); + } + + let cpu_config = RenderConfig::new( + &layout, + cx.params.width, + cx.params.height, + &cx.params.base_color, + ); + let buffer_sizes = &cpu_config.buffer_sizes; + let wg_counts = &cpu_config.workgroup_counts; + + let scene_buf = recording.upload("scene", packed); + let config_buf = recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)); + let info_bin_data_buf = BufferProxy::new( + buffer_sizes.bin_data.size_in_bytes() as u64, + "info_bin_data_buf", + ); + let tile_buf = BufferProxy::new(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf"); + let segments_buf = + BufferProxy::new(buffer_sizes.segments.size_in_bytes().into(), "segments_buf"); + let ptcl_buf = BufferProxy::new(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf"); + let reduced_buf = BufferProxy::new( + buffer_sizes.path_reduced.size_in_bytes().into(), + "reduced_buf", + ); + // TODO: really only need pathtag_wgs - 1 + recording.dispatch( + cx.shaders.pathtag_reduce, + wg_counts.path_reduce, + (config_buf, scene_buf, reduced_buf), + ); + let mut pathtag_parent = reduced_buf; + let mut large_pathtag_bufs = None; + let use_large_path_scan = wg_counts.use_large_path_scan && !cx.shaders.pathtag_is_cpu; + if use_large_path_scan { + let reduced2_buf = BufferProxy::new( + buffer_sizes.path_reduced2.size_in_bytes().into(), + "reduced2_buf", + ); + recording.dispatch( + cx.shaders.pathtag_reduce2, + wg_counts.path_reduce2, + (reduced_buf, reduced2_buf), + ); + let reduced_scan_buf = BufferProxy::new( + buffer_sizes.path_reduced_scan.size_in_bytes().into(), + "reduced_scan_buf", + ); + recording.dispatch( + cx.shaders.pathtag_scan1, + wg_counts.path_scan1, + (reduced_buf, reduced2_buf, reduced_scan_buf), + ); + pathtag_parent = reduced_scan_buf; + large_pathtag_bufs = Some((reduced2_buf, reduced_scan_buf)); + } + + let tagmonoid_buf = BufferProxy::new( + buffer_sizes.path_monoids.size_in_bytes().into(), + "tagmonoid_buf", + ); + let pathtag_scan = if use_large_path_scan { + cx.shaders.pathtag_scan_large + } else { + cx.shaders.pathtag_scan + }; + recording.dispatch( + pathtag_scan, + wg_counts.path_scan, + (config_buf, scene_buf, pathtag_parent, tagmonoid_buf), + ); + recording.free_resource(reduced_buf.into()); + if let Some((reduced2, reduced_scan)) = large_pathtag_bufs { + recording.free_resource(reduced2.into()); + recording.free_resource(reduced_scan.into()); + } + let path_bbox_buf = BufferProxy::new( + buffer_sizes.path_bboxes.size_in_bytes().into(), + "path_bbox_buf", + ); + recording.dispatch( + cx.shaders.bbox_clear, + wg_counts.bbox_clear, + (config_buf, path_bbox_buf), + ); + let bump_buf = BufferProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf"); + recording.clear_all(bump_buf); + let lines_buf = BufferProxy::new(buffer_sizes.lines.size_in_bytes().into(), "lines_buf"); + recording.dispatch( + cx.shaders.flatten, + wg_counts.flatten, + ( + config_buf, + scene_buf, + tagmonoid_buf, + path_bbox_buf, + bump_buf, + lines_buf, + ), + ); + let draw_reduced_buf = BufferProxy::new( + buffer_sizes.draw_reduced.size_in_bytes().into(), + "draw_reduced_buf", + ); + recording.dispatch( + cx.shaders.draw_reduce, + wg_counts.draw_reduce, + (config_buf, scene_buf, draw_reduced_buf), + ); + let draw_monoid_buf = BufferProxy::new( + buffer_sizes.draw_monoids.size_in_bytes().into(), + "draw_monoid_buf", + ); + let clip_inp_buf = BufferProxy::new( + buffer_sizes.clip_inps.size_in_bytes().into(), + "clip_inp_buf", + ); + recording.dispatch( + cx.shaders.draw_leaf, + wg_counts.draw_leaf, + ( + config_buf, + scene_buf, + draw_reduced_buf, + path_bbox_buf, + draw_monoid_buf, + info_bin_data_buf, + clip_inp_buf, + ), + ); + recording.free_resource(draw_reduced_buf.into()); + let clip_el_buf = + BufferProxy::new(buffer_sizes.clip_els.size_in_bytes().into(), "clip_el_buf"); + let clip_bic_buf = BufferProxy::new( + buffer_sizes.clip_bics.size_in_bytes().into(), + "clip_bic_buf", + ); + if wg_counts.clip_reduce.0 > 0 { + recording.dispatch( + cx.shaders.clip_reduce, + wg_counts.clip_reduce, + (clip_inp_buf, path_bbox_buf, clip_bic_buf, clip_el_buf), + ); + } + let clip_bbox_buf = BufferProxy::new( + buffer_sizes.clip_bboxes.size_in_bytes().into(), + "clip_bbox_buf", + ); + if wg_counts.clip_leaf.0 > 0 { + recording.dispatch( + cx.shaders.clip_leaf, + wg_counts.clip_leaf, + ( + config_buf, + clip_inp_buf, + path_bbox_buf, + clip_bic_buf, + clip_el_buf, + draw_monoid_buf, + clip_bbox_buf, + ), + ); + } + recording.free_resource(clip_inp_buf.into()); + recording.free_resource(clip_bic_buf.into()); + recording.free_resource(clip_el_buf.into()); + let draw_bbox_buf = BufferProxy::new( + buffer_sizes.draw_bboxes.size_in_bytes().into(), + "draw_bbox_buf", + ); + let bin_header_buf = BufferProxy::new( + buffer_sizes.bin_headers.size_in_bytes().into(), + "bin_header_buf", + ); + recording.dispatch( + cx.shaders.binning, + wg_counts.binning, + ( + config_buf, + draw_monoid_buf, + path_bbox_buf, + clip_bbox_buf, + draw_bbox_buf, + bump_buf, + info_bin_data_buf, + bin_header_buf, + ), + ); + recording.free_resource(draw_monoid_buf.into()); + recording.free_resource(path_bbox_buf.into()); + recording.free_resource(clip_bbox_buf.into()); + // Note: this only needs to be rounded up because of the workaround to store the tile_offset + // in storage rather than workgroup memory. + let path_buf = + ResourceProxy::new_buf(buffer_sizes.paths.size_in_bytes().into(), "path_buf"); + recording.dispatch( + cx.shaders.tile_alloc, + wg_counts.tile_alloc, + ( + config_buf, + scene_buf, + draw_bbox_buf, + bump_buf, + path_buf, + tile_buf, + ), + ); + recording.free_resource(draw_bbox_buf.into()); + recording.free_resource(tagmonoid_buf.into()); + let indirect_count_buf = BufferProxy::new( + buffer_sizes.indirect_count.size_in_bytes().into(), + "indirect_count", + ); + recording.dispatch( + cx.shaders.path_count_setup, + wg_counts.path_count_setup, + (bump_buf, indirect_count_buf), + ); + let seg_counts_buf = BufferProxy::new( + buffer_sizes.seg_counts.size_in_bytes().into(), + "seg_counts_buf", + ); + recording.dispatch_indirect( + cx.shaders.path_count, + indirect_count_buf, + 0, + ( + config_buf, + bump_buf, + lines_buf, + path_buf, + tile_buf, + seg_counts_buf, + ), + ); + recording.dispatch( + cx.shaders.backdrop, + wg_counts.backdrop, + (config_buf, path_buf, tile_buf), + ); + recording.dispatch( + cx.shaders.coarse, + wg_counts.coarse, + ( + config_buf, + scene_buf, + draw_monoid_buf, + bin_header_buf, + info_bin_data_buf, + path_buf, + tile_buf, + bump_buf, + ptcl_buf, + ), + ); + recording.dispatch( + cx.shaders.path_tiling_setup, + wg_counts.path_tiling_setup, + (bump_buf, indirect_count_buf, ptcl_buf), + ); + recording.dispatch_indirect( + cx.shaders.path_tiling, + indirect_count_buf, + 0, + ( + bump_buf, + seg_counts_buf, + lines_buf, + path_buf, + tile_buf, + segments_buf, + ), + ); + recording.free_buffer(indirect_count_buf); + recording.free_resource(seg_counts_buf.into()); + recording.free_resource(lines_buf.into()); + recording.free_resource(scene_buf.into()); + recording.free_resource(draw_monoid_buf.into()); + recording.free_resource(bin_header_buf.into()); + recording.free_resource(path_buf); + + if cx.robust { + recording.download(bump_buf); + } + recording.free_resource(bump_buf.into()); + + ( + recording, + CoarseOutput { + config_buf: cx.resources.managed_buffer(config_buf), + tile_buf: cx.resources.managed_buffer(tile_buf), + segments_buf: cx.resources.managed_buffer(segments_buf), + ptcl_buf: cx.resources.managed_buffer(ptcl_buf), + gradient_image: cx.resources.managed_image(gradient_image), + info_bin_data_buf: cx.resources.managed_buffer(info_bin_data_buf), + image_atlas: cx.resources.managed_image(image_atlas), + fine_workgroup_size: wg_counts.fine, + }, + ) + } +} diff --git a/src/render_graph/passes/fine.rs b/src/render_graph/passes/fine.rs new file mode 100644 index 00000000..6530a44a --- /dev/null +++ b/src/render_graph/passes/fine.rs @@ -0,0 +1,90 @@ +use vello_encoding::{make_mask_lut, make_mask_lut_16, WorkgroupSize}; + +use crate::{ + render_graph::{Handle, PassContext}, + AaConfig, BufferProxy, ImageProxy, Recording, +}; + +use super::RenderPass; + +pub struct VelloFine { + pub config_buf: Handle, + pub tile_buf: Handle, + pub segments_buf: Handle, + pub ptcl_buf: Handle, + pub gradient_image: Handle, + pub info_bin_data_buf: Handle, + pub image_atlas: Handle, + + pub out_image: Handle, + + pub fine_workgroup_size: WorkgroupSize, +} + +impl RenderPass for VelloFine { + type Output = (); + + fn record(self, cx: PassContext<'_>) -> (Recording, Self::Output) { + let mut recording = Recording::default(); + + match cx.params.antialiasing_method { + AaConfig::Area => { + recording.dispatch( + cx.shaders + .fine_area + .expect("shaders not configured to support AA mode: area"), + self.fine_workgroup_size, + ( + self.config_buf, + self.segments_buf, + self.ptcl_buf, + self.info_bin_data_buf, + self.out_image, + self.gradient_image, + self.image_atlas, + ), + ); + } + _ => { + let mask_lut = match cx.params.antialiasing_method { + AaConfig::Msaa16 => make_mask_lut_16(), + AaConfig::Msaa8 => make_mask_lut(), + _ => unreachable!(), + }; + let mask_buf = recording.upload("mask lut", mask_lut); + + let fine_shader = match cx.params.antialiasing_method { + AaConfig::Msaa16 => cx + .shaders + .fine_msaa16 + .expect("shaders not configured to support AA mode: msaa16"), + AaConfig::Msaa8 => cx + .shaders + .fine_msaa8 + .expect("shaders not configured to support AA mode: msaa8"), + _ => unreachable!(), + }; + recording.dispatch( + fine_shader, + self.fine_workgroup_size, + ( + self.config_buf, + self.segments_buf, + self.ptcl_buf, + self.info_bin_data_buf, + self.out_image, + self.gradient_image, + self.image_atlas, + mask_buf, + ), + ); + // TODO: make mask buf persistent + // could we move mask_buf out of this and make a util that handles + // this resource with the graph and easily creates on AaConfig change? + recording.free_resource(mask_buf.into()); + } + } + + (recording, ()) + } +} diff --git a/src/render_graph/passes/mod.rs b/src/render_graph/passes/mod.rs new file mode 100644 index 00000000..7b435650 --- /dev/null +++ b/src/render_graph/passes/mod.rs @@ -0,0 +1,19 @@ +mod coarse; +pub use coarse::*; + +mod fine; +pub use fine::*; + +use crate::Recording; + +use super::PassContext; + +pub trait RenderPass: Send + Sync { + type Output: Clone + Copy + 'static + where + Self: Sized; + + fn record(self, cx: PassContext<'_>) -> (Recording, Self::Output) + where + Self: Sized; +}