From faf8f7e8908735eda3d0a00108ef817ae29d7ff4 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 10 Jun 2021 16:05:44 -0400 Subject: [PATCH] Simple API for coherent mapping --- wgpu-core/src/device/mod.rs | 59 ++++++++++++++++--------------- wgpu-core/src/device/queue.rs | 31 ++++++++-------- wgpu-core/src/resource.rs | 1 - wgpu-hal/examples/halmark/main.rs | 26 +++++++++----- wgpu-hal/src/empty.rs | 2 +- wgpu-hal/src/lib.rs | 8 ++++- wgpu-hal/src/metal/device.rs | 7 ++-- 7 files changed, 76 insertions(+), 58 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 3d2a6b0a4b..7d8632afdb 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -121,20 +121,20 @@ fn map_buffer( size: BufferAddress, kind: HostMap, ) -> Result, resource::BufferAccessError> { - let ptr = unsafe { + let mapping = unsafe { raw.map_buffer(buffer.raw.as_ref().unwrap(), offset..offset + size) .map_err(DeviceError::from)? }; buffer.sync_mapped_writes = match kind { - HostMap::Read if !buffer.is_coherent => unsafe { + HostMap::Read if !mapping.is_coherent => unsafe { raw.invalidate_mapped_ranges( buffer.raw.as_ref().unwrap(), iter::once(offset..offset + size), ); None }, - HostMap::Write if !buffer.is_coherent => Some(offset..offset + size), + HostMap::Write if !mapping.is_coherent => Some(offset..offset + size), _ => None, }; @@ -146,12 +146,15 @@ fn map_buffer( // we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region. // // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. - let zero_init_needs_flush_now = !buffer.is_coherent && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. + let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. for uninitialized_range in buffer.initialization_status.drain(offset..(size + offset)) { let num_bytes = uninitialized_range.end - uninitialized_range.start; unsafe { ptr::write_bytes( - ptr.as_ptr().offset(uninitialized_range.start as isize), + mapping + .ptr + .as_ptr() + .offset(uninitialized_range.start as isize), 0, num_bytes as usize, ) @@ -166,7 +169,7 @@ fn map_buffer( } } - Ok(ptr) + Ok(mapping.ptr) } //Note: this logic is specifically moved out of `handle_mapping()` in order to @@ -445,7 +448,6 @@ impl Device { }, usage: desc.usage, size: desc.size, - is_coherent: true, //TODO? initialization_status: MemoryInitTracker::new(desc.size), sync_mapped_writes: None, map_state: resource::BufferMapState::Idle, @@ -2449,8 +2451,8 @@ impl Global { } }; let stage_buffer = stage.raw.unwrap(); - let ptr = match unsafe { device.raw.map_buffer(&stage_buffer, 0..stage.size) } { - Ok(ptr) => ptr, + let mapping = match unsafe { device.raw.map_buffer(&stage_buffer, 0..stage.size) } { + Ok(mapping) => mapping, Err(e) => { let raw = buffer.raw.unwrap(); let mut life_lock = device.lock_life(&mut token); @@ -2466,13 +2468,13 @@ impl Global { // Zero initialize memory and then mark both staging and buffer as initialized // (it's guaranteed that this is the case by the time the buffer is usable) - unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) }; + unsafe { ptr::write_bytes(mapping.ptr.as_ptr(), 0, buffer.size as usize) }; buffer.initialization_status.clear(0..buffer.size); stage.initialization_status.clear(0..buffer.size); buffer.map_state = resource::BufferMapState::Init { - ptr, - needs_flush: !stage.is_coherent, + ptr: mapping.ptr, + needs_flush: !mapping.is_coherent, stage_buffer, }; hal::BufferUse::COPY_DST @@ -2555,21 +2557,21 @@ impl Global { let raw_buf = buffer.raw.as_ref().unwrap(); unsafe { - let ptr = device + let mapping = device .raw - .map_buffer(raw_buf, 0..data.len() as u64) + .map_buffer(raw_buf, offset..offset + data.len() as u64) .map_err(DeviceError::from)?; - ptr::copy_nonoverlapping( - data.as_ptr(), - ptr.as_ptr().offset(offset as isize), - data.len(), - ); + ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()); + if !mapping.is_coherent { + device + .raw + .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64)); + } device .raw .unmap_buffer(raw_buf) .map_err(DeviceError::from)?; } - //TODO: flush Ok(()) } @@ -2598,18 +2600,19 @@ impl Global { check_buffer_usage(buffer.usage, wgt::BufferUsage::MAP_READ)?; //assert!(buffer isn't used by the GPU); - //TODO: invalidate let raw_buf = buffer.raw.as_ref().unwrap(); unsafe { - let ptr = device + let mapping = device .raw - .map_buffer(raw_buf, 0..data.len() as u64) + .map_buffer(raw_buf, offset..offset + data.len() as u64) .map_err(DeviceError::from)?; - ptr::copy_nonoverlapping( - ptr.as_ptr().offset(offset as isize), - data.as_mut_ptr(), - data.len(), - ); + if !mapping.is_coherent { + device.raw.invalidate_mapped_ranges( + raw_buf, + iter::once(offset..offset + data.len() as u64), + ); + } + ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len()); device .raw .unmap_buffer(raw_buf) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 7cc50d8b63..93002d7d8d 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -26,7 +26,6 @@ use thiserror::Error; struct StagingData { buffer: A::Buffer, cmdbuf: A::CommandBuffer, - is_coherent: bool, } impl StagingData { @@ -36,8 +35,12 @@ impl StagingData { offset: wgt::BufferAddress, data: &[u8], ) -> Result<(), hal::DeviceError> { - let ptr = device.map_buffer(&self.buffer, offset..offset + data.len() as u64)?; - ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), data.len()); + let mapping = device.map_buffer(&self.buffer, offset..offset + data.len() as u64)?; + ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()); + if !mapping.is_coherent { + device + .flush_mapped_ranges(&self.buffer, iter::once(offset..offset + data.len() as u64)); + } device.unmap_buffer(&self.buffer)?; Ok(()) } @@ -175,11 +178,7 @@ impl super::Device { Some(cmdbuf) => cmdbuf, None => PendingWrites::::create_cmd_buf(&self.raw), }; - Ok(StagingData { - buffer, - cmdbuf, - is_coherent: true, //TODO - }) + Ok(StagingData { buffer, cmdbuf }) } fn initialize_buffer_memory( @@ -474,13 +473,13 @@ impl Global { width_blocks * format_desc.block_size as u32 }; - let ptr = unsafe { device.raw.map_buffer(&stage.buffer, 0..stage_size) } + let mapping = unsafe { device.raw.map_buffer(&stage.buffer, 0..stage_size) } .map_err(DeviceError::from)?; unsafe { profiling::scope!("copy"); if stage_bytes_per_row == bytes_per_row { // Fast path if the data isalready being aligned optimally. - ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), stage_size as usize); + ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), stage_size as usize); } else { // Copy row by row into the optimal alignment. let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; @@ -490,7 +489,7 @@ impl Global { ptr::copy_nonoverlapping( data.as_ptr() .offset((rows_offset + row) as isize * bytes_per_row as isize), - ptr.as_ptr().offset( + mapping.ptr.as_ptr().offset( (rows_offset + row) as isize * stage_bytes_per_row as isize, ), copy_bytes_per_row, @@ -500,15 +499,15 @@ impl Global { } } unsafe { - device - .raw - .unmap_buffer(&stage.buffer) - .map_err(DeviceError::from)?; - if !stage.is_coherent { + if !mapping.is_coherent { device .raw .flush_mapped_ranges(&stage.buffer, iter::once(0..stage_size)); } + device + .raw + .unmap_buffer(&stage.buffer) + .map_err(DeviceError::from)?; } // WebGPU uses the physical size of the texture for copies whereas vulkan uses diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 767cb890b1..57eed69895 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -122,7 +122,6 @@ pub type BufferDescriptor<'a> = wgt::BufferDescriptor>; pub struct Buffer { pub(crate) raw: Option, pub(crate) device_id: Stored, - pub(crate) is_coherent: bool, pub(crate) usage: wgt::BufferUsage, pub(crate) size: wgt::BufferAddress, pub(crate) initialization_status: MemoryInitTracker, diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index def4fc5a97..1d83370372 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -201,12 +201,16 @@ impl Example { }; let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; unsafe { - let _is_coherent = true; //TODO - let ptr = device + let mapping = device .map_buffer(&staging_buffer, 0..staging_buffer_desc.size) .unwrap(); - ptr::copy_nonoverlapping(texture_data.as_ptr(), ptr.as_ptr(), texture_data.len()); + ptr::copy_nonoverlapping( + texture_data.as_ptr(), + mapping.ptr.as_ptr(), + texture_data.len(), + ); device.unmap_buffer(&staging_buffer).unwrap(); + assert!(mapping.is_coherent); } let texture_desc = hal::TextureDescriptor { @@ -298,16 +302,16 @@ impl Example { }; let global_buffer = unsafe { let buffer = device.create_buffer(&global_buffer_desc).unwrap(); - let _is_coherent = true; //TODO - let ptr = device + let mapping = device .map_buffer(&buffer, 0..global_buffer_desc.size) .unwrap(); ptr::copy_nonoverlapping( &globals as *const Globals as *const u8, - ptr.as_ptr(), + mapping.ptr.as_ptr(), mem::size_of::(), ); device.unmap_buffer(&buffer).unwrap(); + assert!(mapping.is_coherent); buffer }; @@ -463,13 +467,17 @@ impl Example { } unsafe { - let _is_coherent = true; //TODO let size = self.bunnies.len() * wgt::BIND_BUFFER_ALIGNMENT as usize; - let ptr = self + let mapping = self .device .map_buffer(&self.local_buffer, 0..size as wgt::BufferAddress) .unwrap(); - ptr::copy_nonoverlapping(self.bunnies.as_ptr() as *const u8, ptr.as_ptr(), size); + ptr::copy_nonoverlapping( + self.bunnies.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + size, + ); + assert!(mapping.is_coherent); self.device.unmap_buffer(&self.local_buffer).unwrap(); } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 9310d6bea3..5c6c1e9821 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -114,7 +114,7 @@ impl crate::Device for Context { &self, buffer: &Resource, range: crate::MemoryRange, - ) -> DeviceResult> { + ) -> DeviceResult { Err(crate::DeviceError::Lost) } unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 9c86d4ad78..223b797780 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -195,7 +195,7 @@ pub trait Device: Send + Sync { &self, buffer: &A::Buffer, range: MemoryRange, - ) -> Result, DeviceError>; + ) -> Result; unsafe fn unmap_buffer(&self, buffer: &A::Buffer) -> Result<(), DeviceError>; unsafe fn flush_mapped_ranges>( &self, @@ -654,6 +654,12 @@ pub struct OpenDevice { pub queue: A::Queue, } +#[derive(Clone, Debug)] +pub struct BufferMapping { + pub ptr: NonNull, + pub is_coherent: bool, +} + #[derive(Clone, Debug)] pub struct BufferDescriptor<'a> { pub label: Label<'a>, diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index c6501b880e..3ba11bb6a0 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -165,10 +165,13 @@ impl crate::Device for super::Device { &self, buffer: &super::Buffer, range: crate::MemoryRange, - ) -> DeviceResult> { + ) -> DeviceResult { let ptr = buffer.raw.contents() as *mut u8; assert!(!ptr.is_null()); - Ok(ptr::NonNull::new(ptr.offset(range.start as isize)).unwrap()) + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(ptr.offset(range.start as isize)).unwrap(), + is_coherent: true, + }) } unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> {