Simple API for coherent mapping

This commit is contained in:
Dzmitry Malyshau
2021-06-10 16:05:44 -04:00
parent d88aa99997
commit faf8f7e890
7 changed files with 76 additions and 58 deletions

View File

@@ -121,20 +121,20 @@ fn map_buffer<A: hal::Api>(
size: BufferAddress,
kind: HostMap,
) -> Result<ptr::NonNull<u8>, resource::BufferAccessError> {
let ptr = unsafe {
let mapping = unsafe {
raw.map_buffer(buffer.raw.as_ref().unwrap(), offset..offset + size)
.map_err(DeviceError::from)?
};
buffer.sync_mapped_writes = match kind {
HostMap::Read if !buffer.is_coherent => unsafe {
HostMap::Read if !mapping.is_coherent => unsafe {
raw.invalidate_mapped_ranges(
buffer.raw.as_ref().unwrap(),
iter::once(offset..offset + size),
);
None
},
HostMap::Write if !buffer.is_coherent => Some(offset..offset + size),
HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
_ => None,
};
@@ -146,12 +146,15 @@ fn map_buffer<A: hal::Api>(
// we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region.
//
// If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways.
let zero_init_needs_flush_now = !buffer.is_coherent && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways.
let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways.
for uninitialized_range in buffer.initialization_status.drain(offset..(size + offset)) {
let num_bytes = uninitialized_range.end - uninitialized_range.start;
unsafe {
ptr::write_bytes(
ptr.as_ptr().offset(uninitialized_range.start as isize),
mapping
.ptr
.as_ptr()
.offset(uninitialized_range.start as isize),
0,
num_bytes as usize,
)
@@ -166,7 +169,7 @@ fn map_buffer<A: hal::Api>(
}
}
Ok(ptr)
Ok(mapping.ptr)
}
//Note: this logic is specifically moved out of `handle_mapping()` in order to
@@ -445,7 +448,6 @@ impl<A: HalApi> Device<A> {
},
usage: desc.usage,
size: desc.size,
is_coherent: true, //TODO?
initialization_status: MemoryInitTracker::new(desc.size),
sync_mapped_writes: None,
map_state: resource::BufferMapState::Idle,
@@ -2449,8 +2451,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
};
let stage_buffer = stage.raw.unwrap();
let ptr = match unsafe { device.raw.map_buffer(&stage_buffer, 0..stage.size) } {
Ok(ptr) => ptr,
let mapping = match unsafe { device.raw.map_buffer(&stage_buffer, 0..stage.size) } {
Ok(mapping) => mapping,
Err(e) => {
let raw = buffer.raw.unwrap();
let mut life_lock = device.lock_life(&mut token);
@@ -2466,13 +2468,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
// Zero initialize memory and then mark both staging and buffer as initialized
// (it's guaranteed that this is the case by the time the buffer is usable)
unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) };
unsafe { ptr::write_bytes(mapping.ptr.as_ptr(), 0, buffer.size as usize) };
buffer.initialization_status.clear(0..buffer.size);
stage.initialization_status.clear(0..buffer.size);
buffer.map_state = resource::BufferMapState::Init {
ptr,
needs_flush: !stage.is_coherent,
ptr: mapping.ptr,
needs_flush: !mapping.is_coherent,
stage_buffer,
};
hal::BufferUse::COPY_DST
@@ -2555,21 +2557,21 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let raw_buf = buffer.raw.as_ref().unwrap();
unsafe {
let ptr = device
let mapping = device
.raw
.map_buffer(raw_buf, 0..data.len() as u64)
.map_buffer(raw_buf, offset..offset + data.len() as u64)
.map_err(DeviceError::from)?;
ptr::copy_nonoverlapping(
data.as_ptr(),
ptr.as_ptr().offset(offset as isize),
data.len(),
);
ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
if !mapping.is_coherent {
device
.raw
.flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64));
}
device
.raw
.unmap_buffer(raw_buf)
.map_err(DeviceError::from)?;
}
//TODO: flush
Ok(())
}
@@ -2598,18 +2600,19 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
check_buffer_usage(buffer.usage, wgt::BufferUsage::MAP_READ)?;
//assert!(buffer isn't used by the GPU);
//TODO: invalidate
let raw_buf = buffer.raw.as_ref().unwrap();
unsafe {
let ptr = device
let mapping = device
.raw
.map_buffer(raw_buf, 0..data.len() as u64)
.map_buffer(raw_buf, offset..offset + data.len() as u64)
.map_err(DeviceError::from)?;
ptr::copy_nonoverlapping(
ptr.as_ptr().offset(offset as isize),
data.as_mut_ptr(),
data.len(),
);
if !mapping.is_coherent {
device.raw.invalidate_mapped_ranges(
raw_buf,
iter::once(offset..offset + data.len() as u64),
);
}
ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len());
device
.raw
.unmap_buffer(raw_buf)

View File

@@ -26,7 +26,6 @@ use thiserror::Error;
struct StagingData<A: hal::Api> {
buffer: A::Buffer,
cmdbuf: A::CommandBuffer,
is_coherent: bool,
}
impl<A: hal::Api> StagingData<A> {
@@ -36,8 +35,12 @@ impl<A: hal::Api> StagingData<A> {
offset: wgt::BufferAddress,
data: &[u8],
) -> Result<(), hal::DeviceError> {
let ptr = device.map_buffer(&self.buffer, offset..offset + data.len() as u64)?;
ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), data.len());
let mapping = device.map_buffer(&self.buffer, offset..offset + data.len() as u64)?;
ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
if !mapping.is_coherent {
device
.flush_mapped_ranges(&self.buffer, iter::once(offset..offset + data.len() as u64));
}
device.unmap_buffer(&self.buffer)?;
Ok(())
}
@@ -175,11 +178,7 @@ impl<A: hal::Api> super::Device<A> {
Some(cmdbuf) => cmdbuf,
None => PendingWrites::<A>::create_cmd_buf(&self.raw),
};
Ok(StagingData {
buffer,
cmdbuf,
is_coherent: true, //TODO
})
Ok(StagingData { buffer, cmdbuf })
}
fn initialize_buffer_memory(
@@ -474,13 +473,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
width_blocks * format_desc.block_size as u32
};
let ptr = unsafe { device.raw.map_buffer(&stage.buffer, 0..stage_size) }
let mapping = unsafe { device.raw.map_buffer(&stage.buffer, 0..stage_size) }
.map_err(DeviceError::from)?;
unsafe {
profiling::scope!("copy");
if stage_bytes_per_row == bytes_per_row {
// Fast path if the data isalready being aligned optimally.
ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), stage_size as usize);
ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), stage_size as usize);
} else {
// Copy row by row into the optimal alignment.
let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
@@ -490,7 +489,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
ptr::copy_nonoverlapping(
data.as_ptr()
.offset((rows_offset + row) as isize * bytes_per_row as isize),
ptr.as_ptr().offset(
mapping.ptr.as_ptr().offset(
(rows_offset + row) as isize * stage_bytes_per_row as isize,
),
copy_bytes_per_row,
@@ -500,15 +499,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}
unsafe {
device
.raw
.unmap_buffer(&stage.buffer)
.map_err(DeviceError::from)?;
if !stage.is_coherent {
if !mapping.is_coherent {
device
.raw
.flush_mapped_ranges(&stage.buffer, iter::once(0..stage_size));
}
device
.raw
.unmap_buffer(&stage.buffer)
.map_err(DeviceError::from)?;
}
// WebGPU uses the physical size of the texture for copies whereas vulkan uses

View File

@@ -122,7 +122,6 @@ pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
pub struct Buffer<A: hal::Api> {
pub(crate) raw: Option<A::Buffer>,
pub(crate) device_id: Stored<DeviceId>,
pub(crate) is_coherent: bool,
pub(crate) usage: wgt::BufferUsage,
pub(crate) size: wgt::BufferAddress,
pub(crate) initialization_status: MemoryInitTracker,

View File

@@ -201,12 +201,16 @@ impl<A: hal::Api> Example<A> {
};
let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() };
unsafe {
let _is_coherent = true; //TODO
let ptr = device
let mapping = device
.map_buffer(&staging_buffer, 0..staging_buffer_desc.size)
.unwrap();
ptr::copy_nonoverlapping(texture_data.as_ptr(), ptr.as_ptr(), texture_data.len());
ptr::copy_nonoverlapping(
texture_data.as_ptr(),
mapping.ptr.as_ptr(),
texture_data.len(),
);
device.unmap_buffer(&staging_buffer).unwrap();
assert!(mapping.is_coherent);
}
let texture_desc = hal::TextureDescriptor {
@@ -298,16 +302,16 @@ impl<A: hal::Api> Example<A> {
};
let global_buffer = unsafe {
let buffer = device.create_buffer(&global_buffer_desc).unwrap();
let _is_coherent = true; //TODO
let ptr = device
let mapping = device
.map_buffer(&buffer, 0..global_buffer_desc.size)
.unwrap();
ptr::copy_nonoverlapping(
&globals as *const Globals as *const u8,
ptr.as_ptr(),
mapping.ptr.as_ptr(),
mem::size_of::<Globals>(),
);
device.unmap_buffer(&buffer).unwrap();
assert!(mapping.is_coherent);
buffer
};
@@ -463,13 +467,17 @@ impl<A: hal::Api> Example<A> {
}
unsafe {
let _is_coherent = true; //TODO
let size = self.bunnies.len() * wgt::BIND_BUFFER_ALIGNMENT as usize;
let ptr = self
let mapping = self
.device
.map_buffer(&self.local_buffer, 0..size as wgt::BufferAddress)
.unwrap();
ptr::copy_nonoverlapping(self.bunnies.as_ptr() as *const u8, ptr.as_ptr(), size);
ptr::copy_nonoverlapping(
self.bunnies.as_ptr() as *const u8,
mapping.ptr.as_ptr(),
size,
);
assert!(mapping.is_coherent);
self.device.unmap_buffer(&self.local_buffer).unwrap();
}

View File

@@ -114,7 +114,7 @@ impl crate::Device<Api> for Context {
&self,
buffer: &Resource,
range: crate::MemoryRange,
) -> DeviceResult<std::ptr::NonNull<u8>> {
) -> DeviceResult<crate::BufferMapping> {
Err(crate::DeviceError::Lost)
}
unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> {

View File

@@ -195,7 +195,7 @@ pub trait Device<A: Api>: Send + Sync {
&self,
buffer: &A::Buffer,
range: MemoryRange,
) -> Result<NonNull<u8>, DeviceError>;
) -> Result<BufferMapping, DeviceError>;
unsafe fn unmap_buffer(&self, buffer: &A::Buffer) -> Result<(), DeviceError>;
unsafe fn flush_mapped_ranges<I: Iterator<Item = MemoryRange>>(
&self,
@@ -654,6 +654,12 @@ pub struct OpenDevice<A: Api> {
pub queue: A::Queue,
}
#[derive(Clone, Debug)]
pub struct BufferMapping {
pub ptr: NonNull<u8>,
pub is_coherent: bool,
}
#[derive(Clone, Debug)]
pub struct BufferDescriptor<'a> {
pub label: Label<'a>,

View File

@@ -165,10 +165,13 @@ impl crate::Device<super::Api> for super::Device {
&self,
buffer: &super::Buffer,
range: crate::MemoryRange,
) -> DeviceResult<ptr::NonNull<u8>> {
) -> DeviceResult<crate::BufferMapping> {
let ptr = buffer.raw.contents() as *mut u8;
assert!(!ptr.is_null());
Ok(ptr::NonNull::new(ptr.offset(range.start as isize)).unwrap())
Ok(crate::BufferMapping {
ptr: ptr::NonNull::new(ptr.offset(range.start as isize)).unwrap(),
is_coherent: true,
})
}
unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> {