From 8431da80eddefbf860410e221e74d8a370172a1d Mon Sep 17 00:00:00 2001 From: Tristam MacDonald Date: Tue, 12 Feb 2019 09:36:29 -0800 Subject: [PATCH] Implement map read/write async --- Cargo.lock | 2 + examples/hello_compute_rust/main.rs | 12 ++- gfx-examples/src/cube.rs | 53 +++++++--- wgpu-native/src/device.rs | 151 +++++++++++++++++++++++++--- wgpu-native/src/resource.rs | 23 ++++- wgpu-rs/src/lib.rs | 56 ++++++++++- 6 files changed, 268 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61f863b946..dc60851606 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,3 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. [[package]] name = "aho-corasick" version = "0.6.9" diff --git a/examples/hello_compute_rust/main.rs b/examples/hello_compute_rust/main.rs index 310426b24d..5b9400e23f 100644 --- a/examples/hello_compute_rust/main.rs +++ b/examples/hello_compute_rust/main.rs @@ -95,7 +95,17 @@ fn main() { } encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffer, 0, size); - // TODO: read the results back out of the staging buffer + + staging_buffer.map_read_async(0, size, |result: wgpu::BufferMapAsyncResult<&[u8]>| { + if let wgpu::BufferMapAsyncResult::Success(data) = result { + let results = unsafe { ::std::slice::from_raw_parts(data.as_ptr() as *const u32, data.len() / std::mem::size_of::()) }; + println!("Times: {:?}", results); + } + + }); device.get_queue().submit(&[encoder.finish()]); + + // TODO: why does calling unmap() inside the callback prevent the program from exiting? + staging_buffer.unmap(); } diff --git a/gfx-examples/src/cube.rs b/gfx-examples/src/cube.rs index e1ef403245..ab6529f22c 100644 --- a/gfx-examples/src/cube.rs +++ b/gfx-examples/src/cube.rs @@ -115,16 +115,30 @@ impl framework::Example for Example { // Create the vertex and index buffers let vertex_size = mem::size_of::(); let (vertex_data, index_data) = create_vertices(); + let vertex_buffer_length = vertex_data.len() * vertex_size; + let index_buffer_length = index_data.len() * mem::size_of::(); let vertex_buf = device.create_buffer(&wgpu::BufferDescriptor { - size: (vertex_data.len() * vertex_size) as u32, - usage: wgpu::BufferUsageFlags::VERTEX | wgpu::BufferUsageFlags::TRANSFER_DST, + size: vertex_buffer_length as u32, + usage: wgpu::BufferUsageFlags::VERTEX | wgpu::BufferUsageFlags::TRANSFER_DST | wgpu::BufferUsageFlags::MAP_WRITE, }); - vertex_buf.set_sub_data(0, framework::cast_slice(&vertex_data)); + + //vertex_buf.set_sub_data(0, framework::cast_slice(&vertex_data)); + vertex_buf.map_write_async(0, vertex_buffer_length as u32, |result: wgpu::BufferMapAsyncResult<&mut [u8]>| { + if let wgpu::BufferMapAsyncResult::Success(data) = result { + unsafe { std::ptr::copy_nonoverlapping(vertex_data.as_ptr() as *const u8, data.as_mut_ptr(), vertex_buffer_length) }; + } + }); + let index_buf = device.create_buffer(&wgpu::BufferDescriptor { - size: (index_data.len() * 2) as u32, - usage: wgpu::BufferUsageFlags::INDEX | wgpu::BufferUsageFlags::TRANSFER_DST, + size: index_buffer_length as u32, + usage: wgpu::BufferUsageFlags::INDEX | wgpu::BufferUsageFlags::TRANSFER_DST | wgpu::BufferUsageFlags::MAP_WRITE, + }); + // index_buf.set_sub_data(0, framework::cast_slice(&index_data)); + index_buf.map_write_async(0, index_buffer_length as u32, |result: wgpu::BufferMapAsyncResult<&mut [u8]>| { + if let wgpu::BufferMapAsyncResult::Success(data) = result { + unsafe { std::ptr::copy_nonoverlapping(index_data.as_ptr() as *const u8, data.as_mut_ptr(), index_buffer_length) }; + } }); - index_buf.set_sub_data(0, framework::cast_slice(&index_data)); // Create pipeline layout let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { @@ -163,14 +177,19 @@ impl framework::Example for Example { array_size: 1, dimension: wgpu::TextureDimension::D2, format: wgpu::TextureFormat::R8g8b8a8Unorm, - usage: wgpu::TextureUsageFlags::SAMPLED | wgpu::TextureUsageFlags::TRANSFER_DST + usage: wgpu::TextureUsageFlags::SAMPLED | wgpu::TextureUsageFlags::TRANSFER_DST, }); let texture_view = texture.create_default_view(); let temp_buf = device.create_buffer(&wgpu::BufferDescriptor { size: texels.len() as u32, - usage: wgpu::BufferUsageFlags::TRANSFER_SRC | wgpu::BufferUsageFlags::TRANSFER_DST + usage: wgpu::BufferUsageFlags::TRANSFER_SRC | wgpu::BufferUsageFlags::TRANSFER_DST | wgpu::BufferUsageFlags::MAP_WRITE, + }); + // temp_buf.set_sub_data(0, &texels); + temp_buf.map_write_async(0, texels.len() as u32, |result: wgpu::BufferMapAsyncResult<&mut [u8]>| { + if let wgpu::BufferMapAsyncResult::Success(data) = result { + unsafe { std::ptr::copy_nonoverlapping(texels.as_ptr() as *const u8, data.as_mut_ptr(), texels.len()) }; + } }); - temp_buf.set_sub_data(0, &texels); init_encoder.copy_buffer_to_texture( wgpu::BufferCopyView { buffer: &temp_buf, @@ -207,11 +226,16 @@ impl framework::Example for Example { }); let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor { size: 64, - usage: wgpu::BufferUsageFlags::UNIFORM | wgpu::BufferUsageFlags::TRANSFER_DST, + usage: wgpu::BufferUsageFlags::UNIFORM | wgpu::BufferUsageFlags::TRANSFER_DST | wgpu::BufferUsageFlags::MAP_WRITE, }); let mx_total = Self::generate_matrix(sc_desc.width as f32 / sc_desc.height as f32); let mx_ref: &[f32; 16] = mx_total.as_ref(); - uniform_buf.set_sub_data(0, framework::cast_slice(&mx_ref[..])); + // uniform_buf.set_sub_data(0, framework::cast_slice(&mx_ref[..])); + uniform_buf.map_write_async(0, 64, |result: wgpu::BufferMapAsyncResult<&mut [u8]>| { + if let wgpu::BufferMapAsyncResult::Success(data) = result { + unsafe { std::ptr::copy_nonoverlapping(mx_ref.as_ptr() as *const u8, data.as_mut_ptr(), 64) }; + } + }); // Create bind group let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { @@ -310,7 +334,12 @@ impl framework::Example for Example { fn resize(&mut self, sc_desc: &wgpu::SwapChainDescriptor, _device: &mut wgpu::Device) { let mx_total = Self::generate_matrix(sc_desc.width as f32 / sc_desc.height as f32); let mx_ref: &[f32; 16] = mx_total.as_ref(); - self.uniform_buf.set_sub_data(0, framework::cast_slice(&mx_ref[..])); + // self.uniform_buf.set_sub_data(0, framework::cast_slice(&mx_ref[..])); + self.uniform_buf.map_write_async(0, 64, |result: wgpu::BufferMapAsyncResult<&mut [u8]>| { + if let wgpu::BufferMapAsyncResult::Success(data) = result { + unsafe { std::ptr::copy_nonoverlapping(mx_ref.as_ptr() as *const u8, data.as_mut_ptr(), 64) }; + } + }); } fn render(&mut self, frame: &wgpu::SwapChainOutput, device: &mut wgpu::Device) { diff --git a/wgpu-native/src/device.rs b/wgpu-native/src/device.rs index a7da89d545..61542f733b 100644 --- a/wgpu-native/src/device.rs +++ b/wgpu-native/src/device.rs @@ -3,6 +3,7 @@ use crate::hub::HUB; use crate::track::{TrackerSet, TrackPermit}; use crate::{ LifeGuard, RefCount, Stored, SubmissionIndex, WeaklyStored, + BufferMapAsyncStatus, BufferMapOperation, }; use crate::{ BufferId, CommandBufferId, AdapterId, DeviceId, QueueId, @@ -11,7 +12,7 @@ use crate::{ #[cfg(feature = "local")] use crate::{ BindGroupId, BindGroupLayoutId, PipelineLayoutId, SamplerId, SwapChainId, - ShaderModuleId, CommandEncoderId, RenderPipelineId, ComputePipelineId, + ShaderModuleId, CommandEncoderId, RenderPipelineId, ComputePipelineId, }; use back; @@ -83,9 +84,12 @@ struct ActiveSubmission { index: SubmissionIndex, fence: B::Fence, resources: Vec>, + mapped: Vec, } struct DestroyedResources { + /// Resources that the user has requested be mapped, but are still in use. + mapped: Vec>, /// Resources that are destroyed by the user but still referenced by /// other objects or command buffers. referenced: Vec<(ResourceId, RefCount)>, @@ -95,17 +99,22 @@ struct DestroyedResources { /// Resources that are neither referenced or used, just pending /// actual deletion. free: Vec>, + ready_to_map: Vec, } unsafe impl Send for DestroyedResources {} unsafe impl Sync for DestroyedResources {} impl DestroyedResources { - fn add(&mut self, resource_id: ResourceId, ref_count: RefCount) { + fn destroy(&mut self, resource_id: ResourceId, ref_count: RefCount) { debug_assert!(!self.referenced.iter().any(|r| r.0 == resource_id)); self.referenced.push((resource_id, ref_count)); } + fn map(&mut self, buffer: BufferId, ref_count: RefCount) { + self.mapped.push(Stored{value: buffer, ref_count}); + } + /// Returns the last submission index that is done. fn cleanup(&mut self, raw: &B::Device) -> SubmissionIndex { let mut last_done = 0; @@ -185,10 +194,64 @@ impl DestroyedResources { } } } + + let buffer_guard = HUB.buffers.read(); + + for i in (0..self.mapped.len()).rev() { + // one in resource itself, one here in this list, one the owner holds, and one more somewhere? + let num_refs = self.mapped[i].ref_count.load(); + trace!("{} references remain", num_refs); + if num_refs <= 4 { + // assert_eq!(num_refs, 4); + let resource_id = self.mapped.swap_remove(i).value; + let buf = buffer_guard.get(resource_id); + let submit_index = buf.life_guard.submission_index.load(Ordering::Acquire); + match self + .active + .iter_mut() + .find(|a| a.index == submit_index) + { + Some(a) => a.mapped.push(resource_id), + None => self.ready_to_map.push(resource_id), + } + } + } + } + + fn handle_mapping(&mut self, raw: &::Device) { + let mut buffer_guard = HUB.buffers.write(); + + for buffer_id in self.ready_to_map.drain(..) { + let mut buffer = buffer_guard.get_mut(buffer_id); + let mut operation = None; + std::mem::swap(&mut operation, &mut buffer.pending_map_operation); + match operation { + Some(BufferMapOperation::Read(range, callback, userdata)) => { + if let Ok(ptr) = unsafe { raw.map_memory(&buffer.memory, range.clone()) } { + if !buffer.memory_properties.contains(hal::memory::Properties::COHERENT) { + unsafe { raw.invalidate_mapped_memory_ranges(iter::once((&buffer.memory, range.clone()))).unwrap() }; // TODO + } + callback(BufferMapAsyncStatus::Success, ptr, userdata); + } else { + callback(BufferMapAsyncStatus::Error, std::ptr::null(), userdata); + } + }, + Some(BufferMapOperation::Write(range, callback, userdata)) => { + if let Ok(ptr) = unsafe { raw.map_memory(&buffer.memory, range.clone()) } { + if !buffer.memory_properties.contains(hal::memory::Properties::COHERENT) { + buffer.mapped_write_ranges.push(range); + } + callback(BufferMapAsyncStatus::Success, ptr, userdata); + } else { + callback(BufferMapAsyncStatus::Error, std::ptr::null_mut(), userdata); + } + }, + _ => unreachable!(), + }; + } } } - pub struct Device { pub(crate) raw: B::Device, adapter_id: WeaklyStored, @@ -276,9 +339,11 @@ impl Device { framebuffers: Mutex::new(FastHashMap::default()), desc_pool, destroyed: Mutex::new(DestroyedResources { + mapped: Vec::new(), referenced: Vec::new(), active: Vec::new(), free: Vec::new(), + ready_to_map: Vec::new(), }), } } @@ -296,7 +361,7 @@ pub fn device_create_buffer( ) -> resource::Buffer { let device_guard = HUB.devices.read(); let device = &device_guard.get(device_id); - let (usage, _) = conv::map_buffer_usage(desc.usage); + let (usage, memory_properties) = conv::map_buffer_usage(desc.usage); let mut buffer = unsafe { device.raw.create_buffer(desc.size as u64, usage).unwrap() @@ -310,11 +375,10 @@ pub fn device_create_buffer( .iter() .enumerate() .position(|(id, memory_type)| { - // TODO requirements.type_mask & (1 << id) != 0 && memory_type .properties - .contains(hal::memory::Properties::DEVICE_LOCAL) + .contains(memory_properties) }) .unwrap() .into(); @@ -336,6 +400,10 @@ pub fn device_create_buffer( value: device_id, ref_count: device.life_guard.ref_count.clone(), }, + memory_properties, + memory, + mapped_write_ranges: Vec::new(), + pending_map_operation: None, life_guard: LifeGuard::new(), } } @@ -377,13 +445,12 @@ pub extern "C" fn wgpu_buffer_destroy(buffer_id: BufferId) { .get(buffer.device_id.value) .destroyed .lock() - .add( + .destroy( ResourceId::Buffer(buffer_id), buffer.life_guard.ref_count.clone(), ); } - pub fn device_create_texture( device_id: DeviceId, desc: &resource::TextureDescriptor, @@ -618,7 +685,7 @@ pub extern "C" fn wgpu_texture_destroy(texture_id: TextureId) { .get(texture.device_id.value) .destroyed .lock() - .add( + .destroy( ResourceId::Texture(texture_id), texture.life_guard.ref_count.clone(), ); @@ -637,7 +704,7 @@ pub extern "C" fn wgpu_texture_view_destroy(texture_view_id: TextureViewId) { .get(device_id) .destroyed .lock() - .add( + .destroy( ResourceId::TextureView(texture_view_id), view.life_guard.ref_count.clone(), ); @@ -1047,11 +1114,13 @@ pub extern "C" fn wgpu_queue_submit( let mut destroyed = device.destroyed.lock(); destroyed.triage_referenced(&mut *trackers); let last_done = destroyed.cleanup(&device.raw); + destroyed.handle_mapping(&device.raw); destroyed.active.push(ActiveSubmission { index: old_submit_index + 1, fence, resources: Vec::new(), + mapped: Vec::new(), }); last_done @@ -1386,8 +1455,8 @@ pub fn device_create_swap_chain( let mut destroyed = device.destroyed.lock(); assert_eq!(old.device_id.value, device_id); for frame in old.frames { - destroyed.add(ResourceId::Texture(frame.texture_id.value), frame.texture_id.ref_count); - destroyed.add(ResourceId::TextureView(frame.view_id.value), frame.view_id.ref_count); + destroyed.destroy(ResourceId::Texture(frame.texture_id.value), frame.texture_id.ref_count); + destroyed.destroy(ResourceId::TextureView(frame.view_id.value), frame.view_id.ref_count); } unsafe { old.command_pool.reset() @@ -1610,4 +1679,62 @@ pub extern "C" fn wgpu_buffer_set_sub_data( #[no_mangle] pub extern "C" fn wgpu_device_destroy(device_id: BufferId) { HUB.devices.unregister(device_id); +} + +pub type BufferMapReadCallback = extern "C" fn(status: BufferMapAsyncStatus, data: *const u8, userdata: *mut u8); +pub type BufferMapWriteCallback = extern "C" fn(status: BufferMapAsyncStatus, data: *mut u8, userdata: *mut u8); + +#[no_mangle] +pub extern "C" fn wgpu_buffer_map_read_async( + buffer_id: BufferId, + start: u32, size: u32, callback: BufferMapReadCallback, userdata: *mut u8, +) { + let mut buffer_guard = HUB.buffers.write(); + let buffer = buffer_guard.get_mut(buffer_id); + let device_guard = HUB.devices.read(); + let device = device_guard.get(buffer.device_id.value); + + let range = start as u64..(start + size) as u64; + buffer.pending_map_operation = Some(BufferMapOperation::Read(range, callback, userdata)); + + device + .destroyed + .lock() + .map(buffer_id, buffer.life_guard.ref_count.clone()); +} + +#[no_mangle] +pub extern "C" fn wgpu_buffer_map_write_async( + buffer_id: BufferId, + start: u32, size: u32, callback: BufferMapWriteCallback, userdata: *mut u8, +) { + let mut buffer_guard = HUB.buffers.write(); + let buffer = buffer_guard.get_mut(buffer_id); + let device_guard = HUB.devices.read(); + let device = device_guard.get(buffer.device_id.value); + + let range = start as u64..(start + size) as u64; + buffer.pending_map_operation = Some(BufferMapOperation::Write(range, callback, userdata)); + + device + .destroyed + .lock() + .map(buffer_id, buffer.life_guard.ref_count.clone()); +} + +#[no_mangle] +pub extern "C" fn wgpu_buffer_unmap( + buffer_id: BufferId, +) { + let mut buffer_guard = HUB.buffers.write(); + let buffer = buffer_guard.get_mut(buffer_id); + let mut device_guard = HUB.devices.write(); + let device = device_guard.get_mut(buffer.device_id.value); + + if !buffer.mapped_write_ranges.is_empty() { + unsafe { device.raw.flush_mapped_memory_ranges( buffer.mapped_write_ranges.iter().map(|r| {(&buffer.memory, r.clone())}) ).unwrap() }; // TODO + buffer.mapped_write_ranges.clear(); + } + + unsafe { device.raw.unmap_memory(&buffer.memory) }; } \ No newline at end of file diff --git a/wgpu-native/src/resource.rs b/wgpu-native/src/resource.rs index 158191e9be..fd6826a917 100644 --- a/wgpu-native/src/resource.rs +++ b/wgpu-native/src/resource.rs @@ -1,6 +1,7 @@ use crate::{ Extent3d, LifeGuard, RefCount, Stored, DeviceId, TextureId, + BufferMapReadCallback, BufferMapWriteCallback, }; use crate::swap_chain::{SwapChainLink, SwapImageEpoch}; @@ -10,7 +11,6 @@ use parking_lot::Mutex; use std::borrow::Borrow; - bitflags! { #[repr(transparent)] pub struct BufferUsageFlags: u32 { @@ -33,12 +33,29 @@ pub struct BufferDescriptor { pub usage: BufferUsageFlags, } +pub enum BufferMapAsyncStatus { + Success, + Error, + Unknown, + ContextLost, +} + +pub(crate) enum BufferMapOperation { + Read(std::ops::Range, BufferMapReadCallback, *mut u8), + Write(std::ops::Range, BufferMapWriteCallback, *mut u8), +} + +unsafe impl Send for BufferMapOperation {} +unsafe impl Sync for BufferMapOperation {} + pub struct Buffer { pub(crate) raw: B::Buffer, pub(crate) device_id: Stored, - //pub memory_properties: hal::memory::Properties, + pub(crate) memory_properties: hal::memory::Properties, + pub(crate) memory: B::Memory, + pub(crate) mapped_write_ranges: Vec>, + pub(crate) pending_map_operation: Option, pub(crate) life_guard: LifeGuard, - // TODO: mapping, unmap() } impl Borrow for Buffer { diff --git a/wgpu-rs/src/lib.rs b/wgpu-rs/src/lib.rs index 036c64d1c1..e3b95692c0 100644 --- a/wgpu-rs/src/lib.rs +++ b/wgpu-rs/src/lib.rs @@ -6,12 +6,13 @@ use arrayvec::ArrayVec; use std::ffi::CString; use std::ops::Range; use std::ptr; +use std::slice; pub use wgn::winit; pub use wgn::{ MAX_DEPTH_BIAS_CLAMP, AdapterDescriptor, BindGroupLayoutBinding, BindingType, - BlendDescriptor, BlendOperation, BlendFactor, ColorWriteFlags, + BlendDescriptor, BlendOperation, BlendFactor, BufferMapAsyncStatus, ColorWriteFlags, RasterizationStateDescriptor, CullMode, FrontFace, BufferDescriptor, BufferUsageFlags, IndexFormat, InputStepMode, ShaderAttributeIndex, VertexAttributeDescriptor, VertexFormat, @@ -454,10 +455,63 @@ impl Drop for Device { } } +pub enum BufferMapAsyncResult { + Success(T), + Error, +} + +struct BufferMapReadAsyncUserData)> { + size: u32, + callback: F, +} + +struct BufferMapWriteAsyncUserData)> { + size: u32, + callback: F, +} + impl Buffer { pub fn set_sub_data(&self, offset: u32, data: &[u8]) { wgn::wgpu_buffer_set_sub_data(self.id, offset, data.len() as u32, data.as_ptr()); } + + pub fn map_read_async(&self, start: u32, size: u32, callback: F) + where F: FnOnce(BufferMapAsyncResult<&[u8]>) { + extern "C" fn buffer_map_read_callback_wrapper(status: wgn::BufferMapAsyncStatus, data: *const u8, userdata: *mut u8) + where F: FnOnce(BufferMapAsyncResult<&[u8]>) { + let userdata = unsafe { Box::from_raw(userdata as *mut BufferMapReadAsyncUserData) }; + let data = unsafe { slice::from_raw_parts(data, userdata.size as usize) }; + if let wgn::BufferMapAsyncStatus::Success = status { + (userdata.callback)(BufferMapAsyncResult::Success(data)); + } else { + (userdata.callback)(BufferMapAsyncResult::Error); + } + } + + let userdata = Box::new(BufferMapReadAsyncUserData{size, callback}); + wgn::wgpu_buffer_map_read_async(self.id, start, size, buffer_map_read_callback_wrapper::, Box::into_raw(userdata) as *mut u8); + } + + pub fn map_write_async(&self, start: u32, size: u32, callback: F) + where F: FnOnce(BufferMapAsyncResult<&mut [u8]>) { + extern "C" fn buffer_map_write_callback_wrapper(status: wgn::BufferMapAsyncStatus, data: *mut u8, userdata: *mut u8) + where F: FnOnce(BufferMapAsyncResult<&mut [u8]>) { + let userdata = unsafe { Box::from_raw(userdata as *mut BufferMapWriteAsyncUserData) }; + let data = unsafe { slice::from_raw_parts_mut(data, userdata.size as usize) }; + if let wgn::BufferMapAsyncStatus::Success = status { + (userdata.callback)(BufferMapAsyncResult::Success(data)); + } else { + (userdata.callback)(BufferMapAsyncResult::Error); + } + } + + let userdata = Box::new(BufferMapWriteAsyncUserData{size, callback}); + wgn::wgpu_buffer_map_write_async(self.id, start, size, buffer_map_write_callback_wrapper::, Box::into_raw(userdata) as *mut u8); + } + + pub fn unmap(&self) { + wgn::wgpu_buffer_unmap(self.id); + } } impl Drop for Buffer {