diff --git a/wgpu/examples/boids/main.rs b/wgpu/examples/boids/main.rs index 2c38f1e456..a9304e18d8 100644 --- a/wgpu/examples/boids/main.rs +++ b/wgpu/examples/boids/main.rs @@ -1,8 +1,6 @@ // Flocking boids example with gpu compute update pass // adapted from https://github.com/austinEng/webgpu-samples/blob/master/src/examples/computeBoids.ts -extern crate rand; - #[path = "../framework.rs"] mod framework; @@ -31,7 +29,7 @@ impl framework::Example for Example { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, _queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { // load (and compile) shaders and create shader modules let boids_module = device.create_shader_module(wgpu::include_spirv!("boids.comp.spv")); @@ -221,18 +219,15 @@ impl framework::Example for Example { // returns Example struct and No encoder commands - ( - Example { - particle_bind_groups, - particle_buffers, - vertices_buffer, - compute_pipeline, - render_pipeline, - work_group_count, - frame_num: 0, - }, - None, - ) + Example { + particle_bind_groups, + particle_buffers, + vertices_buffer, + compute_pipeline, + render_pipeline, + work_group_count, + frame_num: 0, + } } /// update is called for any WindowEvent not handled by the framework @@ -256,8 +251,9 @@ impl framework::Example for Example { &mut self, frame: &wgpu::SwapChainTexture, device: &wgpu::Device, - _queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + queue: &wgpu::Queue, + _spawner: &impl futures::task::LocalSpawn, + ) { // create render pass descriptor let render_pass_descriptor = wgpu::RenderPassDescriptor { color_attachments: &[wgpu::RenderPassColorAttachmentDescriptor { @@ -298,7 +294,7 @@ impl framework::Example for Example { self.frame_num += 1; // done - command_encoder.finish() + queue.submit(Some(command_encoder.finish())); } } diff --git a/wgpu/examples/cube/main.rs b/wgpu/examples/cube/main.rs index 26e57a38da..f599c69357 100644 --- a/wgpu/examples/cube/main.rs +++ b/wgpu/examples/cube/main.rs @@ -116,7 +116,7 @@ impl framework::Example for Example { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { use std::mem; // Create the vertex and index buffers @@ -287,15 +287,14 @@ impl framework::Example for Example { }); // Done - let this = Example { + Example { vertex_buf, index_buf, index_count: index_data.len(), bind_group, uniform_buf, pipeline, - }; - (this, None) + } } fn update(&mut self, _event: winit::event::WindowEvent) { @@ -317,8 +316,9 @@ impl framework::Example for Example { &mut self, frame: &wgpu::SwapChainTexture, device: &wgpu::Device, - _queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + queue: &wgpu::Queue, + _spawner: &impl futures::task::LocalSpawn, + ) { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); { @@ -348,7 +348,7 @@ impl framework::Example for Example { rpass.draw_indexed(0..self.index_count as u32, 0, 0..1); } - encoder.finish() + queue.submit(Some(encoder.finish())); } } diff --git a/wgpu/examples/framework.rs b/wgpu/examples/framework.rs index 519d0acd38..e665e4386c 100644 --- a/wgpu/examples/framework.rs +++ b/wgpu/examples/framework.rs @@ -1,8 +1,9 @@ -use std::time; +use futures::task::LocalSpawn; +#[cfg(not(target_arch = "wasm32"))] +use std::time::{Duration, Instant}; use winit::{ event::{self, WindowEvent}, event_loop::{ControlFlow, EventLoop}, - window::Window, }; #[cfg_attr(rustfmt, rustfmt_skip)] @@ -36,7 +37,7 @@ pub trait Example: 'static + Sized { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> (Self, Option); + ) -> Self; fn resize( &mut self, sc_desc: &wgpu::SwapChainDescriptor, @@ -49,10 +50,32 @@ pub trait Example: 'static + Sized { frame: &wgpu::SwapChainTexture, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer; + spawner: &impl LocalSpawn, + ); } -async fn run_async(event_loop: EventLoop<()>, window: Window) { +struct Setup { + window: winit::window::Window, + event_loop: EventLoop<()>, + instance: wgpu::Instance, + size: winit::dpi::PhysicalSize, + surface: wgpu::Surface, + adapter: wgpu::Adapter, + device: wgpu::Device, + queue: wgpu::Queue, +} + +async fn setup(title: &str) -> Setup { + let event_loop = EventLoop::new(); + let mut builder = winit::window::WindowBuilder::new(); + builder = builder.with_title(title); + #[cfg(windows_OFF)] // TODO + { + use winit::platform::windows::WindowBuilderExtWindows; + builder = builder.with_no_redirection_bitmap(true); + } + let window = builder.build(&event_loop).unwrap(); + log::info!("Initializing the surface..."); let instance = wgpu::Instance::new(wgpu::BackendBit::PRIMARY); @@ -90,6 +113,75 @@ async fn run_async(event_loop: EventLoop<()>, window: Window) { .await .unwrap(); + Setup { + window, + event_loop, + instance, + size, + surface, + adapter, + device, + queue, + } +} + +fn start( + Setup { + window, + event_loop, + instance, + size, + surface, + adapter, + device, + queue, + }: Setup, +) { + #[cfg(not(target_arch = "wasm32"))] + let (mut pool, spawner) = { + env_logger::init(); + + #[cfg(feature = "subscriber")] + { + let chrome_tracing_dir = std::env::var("WGPU_CHROME_TRACING"); + wgpu::util::initialize_default_subscriber(chrome_tracing_dir.ok()); + }; + + let local_pool = futures::executor::LocalPool::new(); + let spawner = local_pool.spawner(); + (local_pool, spawner) + }; + + #[cfg(target_arch = "wasm32")] + let spawner = { + use futures::{future::LocalFutureObj, task::SpawnError}; + use winit::platform::web::WindowExtWebSys; + + struct WebSpawner {} + impl LocalSpawn for WebSpawner { + fn spawn_local_obj( + &self, + future: LocalFutureObj<'static, ()>, + ) -> Result<(), SpawnError> { + Ok(wasm_bindgen_futures::spawn_local(future)) + } + } + + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init().expect("could not initialize logger"); + // On wasm, append the canvas to the document body + web_sys::window() + .and_then(|win| win.document()) + .and_then(|doc| doc.body()) + .and_then(|body| { + body.append_child(&web_sys::Element::from(window.canvas())) + .ok() + }) + .expect("couldn't append canvas to document body"); + + WebSpawner {} + }; + let mut sc_desc = wgpu::SwapChainDescriptor { usage: wgpu::TextureUsage::OUTPUT_ATTACHMENT, // TODO: Allow srgb unconditionally @@ -105,13 +197,10 @@ async fn run_async(event_loop: EventLoop<()>, window: Window) { let mut swap_chain = device.create_swap_chain(&surface, &sc_desc); log::info!("Initializing the example..."); - let (mut example, init_command_buf) = E::init(&sc_desc, &device, &queue); - if init_command_buf.is_some() { - queue.submit(init_command_buf); - } + let mut example = E::init(&sc_desc, &device, &queue); #[cfg(not(target_arch = "wasm32"))] - let mut last_update_inst = time::Instant::now(); + let mut last_update_inst = Instant::now(); log::info!("Entering render loop..."); event_loop.run(move |event, _, control_flow| { @@ -121,7 +210,7 @@ async fn run_async(event_loop: EventLoop<()>, window: Window) { } else { #[cfg(not(target_arch = "wasm32"))] { - ControlFlow::WaitUntil(time::Instant::now() + time::Duration::from_millis(10)) + ControlFlow::WaitUntil(Instant::now() + Duration::from_millis(10)) } #[cfg(target_arch = "wasm32")] { @@ -132,10 +221,12 @@ async fn run_async(event_loop: EventLoop<()>, window: Window) { event::Event::MainEventsCleared => { #[cfg(not(target_arch = "wasm32"))] { - if last_update_inst.elapsed() > time::Duration::from_millis(20) { + if last_update_inst.elapsed() > Duration::from_millis(20) { window.request_redraw(); - last_update_inst = time::Instant::now(); + last_update_inst = Instant::now(); } + + pool.run_until_stalled(); } #[cfg(target_arch = "wasm32")] @@ -178,53 +269,26 @@ async fn run_async(event_loop: EventLoop<()>, window: Window) { } }; - let command_buf = example.render(&frame.output, &device, &queue); - queue.submit(Some(command_buf)); + example.render(&frame.output, &device, &queue, &spawner); } _ => {} } }); } +#[cfg(not(target_arch = "wasm32"))] pub fn run(title: &str) { - let event_loop = EventLoop::new(); - let mut builder = winit::window::WindowBuilder::new(); - builder = builder.with_title(title); - #[cfg(windows_OFF)] // TODO - { - use winit::platform::windows::WindowBuilderExtWindows; - builder = builder.with_no_redirection_bitmap(true); - } - let window = builder.build(&event_loop).unwrap(); + let setup = futures::executor::block_on(setup::(title)); + start::(setup); +} - #[cfg(not(target_arch = "wasm32"))] - { - env_logger::init(); - - #[cfg(feature = "subscriber")] - { - let chrome_tracing_dir = std::env::var("WGPU_CHROME_TRACING"); - wgpu::util::initialize_default_subscriber(chrome_tracing_dir.ok()); - }; - - futures::executor::block_on(run_async::(event_loop, window)); - } - #[cfg(target_arch = "wasm32")] - { - std::panic::set_hook(Box::new(console_error_panic_hook::hook)); - console_log::init().expect("could not initialize logger"); - use winit::platform::web::WindowExtWebSys; - // On wasm, append the canvas to the document body - web_sys::window() - .and_then(|win| win.document()) - .and_then(|doc| doc.body()) - .and_then(|body| { - body.append_child(&web_sys::Element::from(window.canvas())) - .ok() - }) - .expect("couldn't append canvas to document body"); - wasm_bindgen_futures::spawn_local(run_async::(event_loop, window)); - } +#[cfg(target_arch = "wasm32")] +pub fn run(title: &str) { + let title = title.to_owned(); + wasm_bindgen_futures::spawn_local(async move { + let setup = setup::(&title).await; + start::(setup); + }); } // This allows treating the framework as a standalone example, diff --git a/wgpu/examples/mipmap/main.rs b/wgpu/examples/mipmap/main.rs index da2c634907..ebf75cfa73 100644 --- a/wgpu/examples/mipmap/main.rs +++ b/wgpu/examples/mipmap/main.rs @@ -204,8 +204,8 @@ impl framework::Example for Example { fn init( sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, - _queue: &wgpu::Queue, - ) -> (Self, Option) { + queue: &wgpu::Queue, + ) -> Self { use std::mem; let mut init_encoder = @@ -375,14 +375,14 @@ impl framework::Example for Example { // Done Self::generate_mipmaps(&mut init_encoder, &device, &texture, mip_level_count); + queue.submit(Some(init_encoder.finish())); - let this = Example { + Example { vertex_buf, bind_group, uniform_buf, draw_pipeline, - }; - (this, Some(init_encoder.finish())) + } } fn update(&mut self, _event: winit::event::WindowEvent) { @@ -404,8 +404,9 @@ impl framework::Example for Example { &mut self, frame: &wgpu::SwapChainTexture, device: &wgpu::Device, - _queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + queue: &wgpu::Queue, + _spawner: &impl futures::task::LocalSpawn, + ) { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); { @@ -432,7 +433,7 @@ impl framework::Example for Example { rpass.draw(0..4, 0..1); } - encoder.finish() + queue.submit(Some(encoder.finish())); } } diff --git a/wgpu/examples/msaa-line/main.rs b/wgpu/examples/msaa-line/main.rs index 482e74a9fe..f42c9f92f0 100644 --- a/wgpu/examples/msaa-line/main.rs +++ b/wgpu/examples/msaa-line/main.rs @@ -132,7 +132,7 @@ impl framework::Example for Example { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, _queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { log::info!("Press left/right arrow keys to change sample_count."); let sample_count = 4; @@ -179,7 +179,7 @@ impl framework::Example for Example { vertex_count, ); - let this = Example { + Example { bundle, vs_module, fs_module, @@ -190,8 +190,7 @@ impl framework::Example for Example { sample_count, rebuild_bundle: false, sc_desc: sc_desc.clone(), - }; - (this, None) + } } fn update(&mut self, event: winit::event::WindowEvent) { @@ -234,8 +233,9 @@ impl framework::Example for Example { &mut self, frame: &wgpu::SwapChainTexture, device: &wgpu::Device, - _queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + queue: &wgpu::Queue, + _spawner: &impl futures::task::LocalSpawn, + ) { if self.rebuild_bundle { self.bundle = Example::create_bundle( device, @@ -281,7 +281,7 @@ impl framework::Example for Example { .execute_bundles(iter::once(&self.bundle)); } - encoder.finish() + queue.submit(iter::once(encoder.finish())); } } diff --git a/wgpu/examples/shadow/main.rs b/wgpu/examples/shadow/main.rs index c243e870aa..8da8837b49 100644 --- a/wgpu/examples/shadow/main.rs +++ b/wgpu/examples/shadow/main.rs @@ -1,4 +1,4 @@ -use std::{mem, ops::Range, rc::Rc}; +use std::{iter, mem, ops::Range, rc::Rc}; #[path = "../framework.rs"] mod framework; @@ -207,7 +207,7 @@ impl framework::Example for Example { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, _queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { // Create the vertex and index buffers let vertex_size = mem::size_of::(); let (cube_vertex_data, cube_index_data) = create_cube(); @@ -656,7 +656,7 @@ impl framework::Example for Example { label: None, }); - let this = Example { + Example { entities, lights, lights_are_dirty: true, @@ -664,8 +664,7 @@ impl framework::Example for Example { forward_pass, forward_depth: depth_texture.create_default_view(), light_uniform_buf, - }; - (this, None) + } } fn update(&mut self, _event: winit::event::WindowEvent) { @@ -708,7 +707,8 @@ impl framework::Example for Example { frame: &wgpu::SwapChainTexture, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + _spawner: &impl futures::task::LocalSpawn, + ) { // update uniforms for entity in self.entities.iter_mut() { if entity.rotation_speed != 0.0 { @@ -810,7 +810,7 @@ impl framework::Example for Example { } } - encoder.finish() + queue.submit(iter::once(encoder.finish())); } } diff --git a/wgpu/examples/skybox/main.rs b/wgpu/examples/skybox/main.rs index 05cb66bfba..4a9b7836c4 100644 --- a/wgpu/examples/skybox/main.rs +++ b/wgpu/examples/skybox/main.rs @@ -1,6 +1,8 @@ #[path = "../framework.rs"] mod framework; +use futures::task::{LocalSpawn, LocalSpawnExt}; + const SKYBOX_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm; type Uniform = cgmath::Matrix4; @@ -19,6 +21,7 @@ pub struct Skybox { bind_group: wgpu::BindGroup, uniform_buf: wgpu::Buffer, uniforms: Uniforms, + staging_belt: wgpu::util::StagingBelt, } impl Skybox { @@ -39,7 +42,7 @@ impl framework::Example for Skybox { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { bindings: &[ wgpu::BindGroupLayoutEntry::new( @@ -227,16 +230,15 @@ impl framework::Example for Skybox { ], label: None, }); - ( - Self { - pipeline, - bind_group, - uniform_buf, - aspect, - uniforms, - }, - None, - ) + + Skybox { + pipeline, + bind_group, + uniform_buf, + aspect, + uniforms, + staging_belt: wgpu::util::StagingBelt::new(0x100, device), + } } fn update(&mut self, _event: winit::event::WindowEvent) { @@ -261,15 +263,21 @@ impl framework::Example for Skybox { frame: &wgpu::SwapChainTexture, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + spawner: &impl LocalSpawn, + ) { // update rotation let rotation = cgmath::Matrix4::::from_angle_x(cgmath::Deg(0.25)); self.uniforms[1] = self.uniforms[1] * rotation; - queue.write_buffer( - &self.uniform_buf, - 0, - bytemuck::cast_slice(&raw_uniforms(&self.uniforms)), - ); + let raw_uniforms = raw_uniforms(&self.uniforms); + self.staging_belt + .write_buffer( + &self.uniform_buf, + 0, + wgpu::BufferSize::new((raw_uniforms.len() * 4) as wgpu::BufferAddress).unwrap(), + device, + ) + .copy_from_slice(bytemuck::cast_slice(&raw_uniforms)); + let transfer_comb = self.staging_belt.flush(device); let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); @@ -295,7 +303,11 @@ impl framework::Example for Skybox { rpass.set_bind_group(0, &self.bind_group, &[]); rpass.draw(0..3 as u32, 0..1); } - encoder.finish() + + queue.submit(vec![transfer_comb, encoder.finish()]); + + let belt_future = self.staging_belt.recall(); + spawner.spawn_local(belt_future).unwrap(); } } diff --git a/wgpu/examples/texture-arrays/main.rs b/wgpu/examples/texture-arrays/main.rs index ce9b1b716c..931bdba12e 100644 --- a/wgpu/examples/texture-arrays/main.rs +++ b/wgpu/examples/texture-arrays/main.rs @@ -95,7 +95,7 @@ impl framework::Example for Example { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { let mut uniform_workaround = false; let vs_module = device.create_shader_module(wgpu::include_spirv!("shader.vert.spv")); let fs_bytes: Vec = match device.capabilities() { @@ -335,16 +335,13 @@ impl framework::Example for Example { alpha_to_coverage_enabled: false, }); - ( - Self { - vertex_buffer, - index_buffer, - bind_group, - pipeline, - uniform_workaround_data, - }, - None, - ) + Self { + vertex_buffer, + index_buffer, + bind_group, + pipeline, + uniform_workaround_data, + } } fn resize( &mut self, @@ -361,8 +358,9 @@ impl framework::Example for Example { &mut self, frame: &wgpu::SwapChainTexture, device: &wgpu::Device, - _queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + queue: &wgpu::Queue, + _spawner: &impl futures::task::LocalSpawn, + ) { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("primary"), }); @@ -393,7 +391,7 @@ impl framework::Example for Example { drop(rpass); - encoder.finish() + queue.submit(Some(encoder.finish())); } } diff --git a/wgpu/examples/water/main.rs b/wgpu/examples/water/main.rs index b280e248b0..050fe0ba11 100644 --- a/wgpu/examples/water/main.rs +++ b/wgpu/examples/water/main.rs @@ -4,7 +4,7 @@ mod framework; mod point_gen; use cgmath::Point3; -use std::mem; +use std::{iter, mem}; /// /// Radius of the terrain. @@ -269,7 +269,7 @@ impl framework::Example for Example { sc_desc: &wgpu::SwapChainDescriptor, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> (Self, Option) { + ) -> Self { // Size of one water vertex let water_vertex_size = mem::size_of::(); @@ -609,7 +609,7 @@ impl framework::Example for Example { }); // Done - let this = Example { + Example { water_vertex_buf, water_vertex_count: water_vertices.len(), water_bind_group_layout, @@ -632,8 +632,7 @@ impl framework::Example for Example { current_frame: 0, active: Some(0), - }; - (this, None) + } } fn update(&mut self, _event: winit::event::WindowEvent) { @@ -680,7 +679,8 @@ impl framework::Example for Example { frame: &wgpu::SwapChainTexture, device: &wgpu::Device, queue: &wgpu::Queue, - ) -> wgpu::CommandBuffer { + _spawner: &impl futures::task::LocalSpawn, + ) { // Increment frame count regardless of if we draw. self.current_frame += 1; let back_color = wgpu::Color { @@ -698,21 +698,21 @@ impl framework::Example for Example { bytemuck::cast_slice(&[water_sin, water_cos]), ); + // Only render valid frames. See resize method. + if let Some(active) = self.active { + if active >= self.current_frame { + return; + } + } else { + return; + } + // The encoder provides a way to turn our instructions here, into // a command buffer the GPU can understand. let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("Main Command Encoder"), }); - // Only render valid frames. See resize method. - if let Some(active) = self.active { - if active >= self.current_frame { - return encoder.finish(); - } - } else { - return encoder.finish(); - } - // First pass: render the reflection. { let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { @@ -791,7 +791,7 @@ impl framework::Example for Example { rpass.draw(0..self.water_vertex_count as u32, 0..1); } - encoder.finish() + queue.submit(iter::once(encoder.finish())); } } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 87826c449c..2defa601db 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -1433,7 +1433,7 @@ impl Buffer { } } -impl BufferSlice<'_> { +impl<'a> BufferSlice<'a> { //TODO: fn slice(&self) -> Self /// Map the buffer. Buffer is ready to map once the future is resolved. @@ -1473,7 +1473,7 @@ impl BufferSlice<'_> { /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic. - pub fn get_mapped_range(&self) -> BufferView { + pub fn get_mapped_range(&self) -> BufferView<'a> { let end = self.buffer.map_context.lock().add(self.offset, self.size); let data = Context::buffer_get_mapped_range( &*self.buffer.context, @@ -1485,7 +1485,7 @@ impl BufferSlice<'_> { /// Synchronously and immediately map a buffer for writing. If the buffer is not immediately mappable /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic. - pub fn get_mapped_range_mut(&self) -> BufferViewMut { + pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> { let end = self.buffer.map_context.lock().add(self.offset, self.size); let data = Context::buffer_get_mapped_range_mut( &*self.buffer.context, diff --git a/wgpu/src/util/belt.rs b/wgpu/src/util/belt.rs new file mode 100644 index 0000000000..6d780785cd --- /dev/null +++ b/wgpu/src/util/belt.rs @@ -0,0 +1,148 @@ +use crate::{ + Buffer, BufferAddress, BufferDescriptor, BufferSize, BufferUsage, BufferViewMut, CommandBuffer, + CommandEncoder, CommandEncoderDescriptor, Device, MapMode, +}; +use futures::{future::join_all, FutureExt}; +use std::{future::Future, mem, sync::mpsc}; + +struct Chunk { + buffer: Buffer, + size: BufferAddress, + offset: BufferAddress, +} + +/// Staging belt is a machine that uploads data. +/// +/// Internally it uses a ring-buffer of staging buffers that are sub-allocated. +/// It has an advantage over `Queue.write_buffer` in a way that it returns a mutable slice, +/// which you can fill to avoid an extra data copy. +pub struct StagingBelt { + chunk_size: BufferAddress, + encoder: CommandEncoder, + /// Chunks that we are actively using for pending transfers at this moment. + active_chunks: Vec, + /// Chunks that have scheduled transfers already. + closed_chunks: Vec, + /// Chunks that are back from the GPU and ready to be used. + free_chunks: Vec, + sender: mpsc::Sender, + receiver: mpsc::Receiver, +} + +impl StagingBelt { + /// Create a new staging belt. + /// + /// The `chunk_size` is the unit of internal buffer allocation. + /// It's better when it's big, but ideally still 1-4 times less than + /// the total amount of data uploaded per submission. + pub fn new(chunk_size: BufferAddress, device: &Device) -> Self { + let (sender, receiver) = mpsc::channel(); + StagingBelt { + chunk_size, + encoder: device.create_command_encoder(&CommandEncoderDescriptor::default()), + active_chunks: Vec::new(), + closed_chunks: Vec::new(), + free_chunks: Vec::new(), + sender, + receiver, + } + } + + /// Allocate the staging belt slice of `size` to be uploaded into the `target` buffer + /// at the specified offset. + /// + /// The upload will only really be scheduled at the next `StagingBelt::flush` call. + pub fn write_buffer( + &mut self, + target: &Buffer, + offset: BufferAddress, + size: BufferSize, + device: &Device, + ) -> BufferViewMut { + let mut chunk = if let Some(index) = self + .active_chunks + .iter() + .position(|chunk| chunk.offset + size.get() <= chunk.size) + { + self.active_chunks.swap_remove(index) + } else if let Some(index) = self + .free_chunks + .iter() + .position(|chunk| size.get() <= chunk.size) + { + self.free_chunks.swap_remove(index) + } else { + let size = self.chunk_size.max(size.get()); + #[cfg(not(target_arch = "wasm32"))] + wgc::span!(_guard, INFO, "Creating chunk of size {}", size); + Chunk { + buffer: device.create_buffer(&BufferDescriptor { + label: Some("staging"), + size, + usage: BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC, + mapped_at_creation: true, + }), + size, + offset: 0, + } + }; + + self.encoder + .copy_buffer_to_buffer(&chunk.buffer, chunk.offset, target, offset, size.get()); + let old_offset = chunk.offset; + chunk.offset += size.get(); + let remainder = chunk.offset % crate::COPY_BUFFER_ALIGNMENT; + if remainder != 0 { + chunk.offset += crate::COPY_BUFFER_ALIGNMENT - remainder; + } + + self.active_chunks.push(chunk); + self.active_chunks + .last() + .unwrap() + .buffer + .slice(old_offset..old_offset + size.get()) + .get_mapped_range_mut() + } + + /// Produce a command buffer with all the accumulated transfers. + /// + /// At this point, all the partially used staging buffers are closed until + /// the GPU is done copying the data from them. + pub fn flush(&mut self, device: &Device) -> CommandBuffer { + #[cfg(not(target_arch = "wasm32"))] + wgc::span!(_guard, DEBUG, "Flushing chunks"); + + for chunk in self.active_chunks.drain(..) { + chunk.buffer.unmap(); + self.closed_chunks.push(chunk); + } + + mem::replace( + &mut self.encoder, + device.create_command_encoder(&CommandEncoderDescriptor::default()), + ) + .finish() + } + + /// Recall all of the closed buffers back for re-usal. + /// + /// This has to be called after the command buffer produced by `flush` is submitted! + pub fn recall(&mut self) -> impl Future + Send { + while let Ok(mut chunk) = self.receiver.try_recv() { + chunk.offset = 0; + self.free_chunks.push(chunk); + } + + let sender_template = &self.sender; + join_all(self.closed_chunks.drain(..).map(|chunk| { + let sender = sender_template.clone(); + chunk + .buffer + .slice(..) + .map_async(MapMode::Write) + .inspect(move |_| sender.send(chunk).unwrap()) + })) + .map(|_| ()) + } +} diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs index cc201a0413..60dcb0515b 100644 --- a/wgpu/src/util/mod.rs +++ b/wgpu/src/util/mod.rs @@ -1,6 +1,10 @@ +mod belt; + #[cfg(all(not(target_arch = "wasm32"), feature = "subscriber"))] pub use wgc::logging::subscriber::{initialize_default_subscriber, ChromeTracingLayer}; +pub use belt::StagingBelt; + /// Wrapper aligning contents to at least 4. #[repr(align(4))] pub struct WordAligned(pub Bytes);