WebGL backend: memory-only staging buffers (#2112)

* webgl2 buffers

* use bitflags::intersects, linting

* address reviewer comments: s/RawBuffer/BufferInner/

* bugfixes

* cargo fmt

* cargo clippy

* cargo fmt again
This commit is contained in:
Mariusz Kryński
2021-10-27 03:16:31 +02:00
committed by GitHub
parent 9e6af05e94
commit 8568188558
4 changed files with 226 additions and 141 deletions

View File

@@ -201,9 +201,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
if !bar.usage.start.contains(crate::BufferUses::STORAGE_WRITE) {
continue;
}
self.cmd_buffer
.commands
.push(C::BufferBarrier(bar.buffer.raw, bar.usage.end));
self.cmd_buffer.commands.push(C::BufferBarrier(
bar.buffer.inner.as_native().unwrap(),
bar.usage.end,
));
}
}
@@ -238,7 +239,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
self.cmd_buffer.commands.push(C::ClearBuffer {
dst: buffer.raw,
dst: buffer.inner.clone(),
dst_target: buffer.target,
range,
});
@@ -259,9 +260,9 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
};
for copy in regions {
self.cmd_buffer.commands.push(C::CopyBufferToBuffer {
src: src.raw,
src: src.inner.clone(),
src_target,
dst: dst.raw,
dst: dst.inner.clone(),
dst_target,
copy,
})
@@ -300,10 +301,11 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
T: Iterator<Item = crate::BufferTextureCopy>,
{
let (dst_raw, dst_target) = dst.inner.as_native();
for mut copy in regions {
copy.clamp_size_to_virtual(&dst.copy_size);
self.cmd_buffer.commands.push(C::CopyBufferToTexture {
src: src.raw,
src: src.inner.clone(),
src_target: src.target,
dst: dst_raw,
dst_target,
@@ -329,7 +331,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
src: src_raw,
src_target,
src_format: src.format,
dst: dst.raw,
dst: dst.inner.clone(),
dst_target: dst.target,
copy,
})
@@ -366,7 +368,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
let query_range = start as u32..self.cmd_buffer.queries.len() as u32;
self.cmd_buffer.commands.push(C::CopyQueryResults {
query_range,
dst: buffer.raw,
dst: buffer.inner.clone(),
dst_target: buffer.target,
dst_offset: offset,
});
@@ -739,7 +741,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
self.state.index_format = format;
self.cmd_buffer
.commands
.push(C::SetIndexBuffer(binding.buffer.raw));
.push(C::SetIndexBuffer(binding.buffer.inner.as_native().unwrap()));
}
unsafe fn set_vertex_buffer<'a>(
&mut self,
@@ -749,7 +751,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
self.state.dirty_vbuf_mask |= 1 << index;
let (_, ref mut vb) = self.state.vertex_buffers[index as usize];
*vb = Some(super::BufferBinding {
raw: binding.buffer.raw,
raw: binding.buffer.inner.as_native().unwrap(),
offset: binding.offset,
});
}
@@ -831,7 +833,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
offset + draw * mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress;
self.cmd_buffer.commands.push(C::DrawIndirect {
topology: self.state.topology,
indirect_buf: buffer.raw,
indirect_buf: buffer.inner.as_native().unwrap(),
indirect_offset,
});
}
@@ -853,7 +855,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
self.cmd_buffer.commands.push(C::DrawIndexedIndirect {
topology: self.state.topology,
index_type,
indirect_buf: buffer.raw,
indirect_buf: buffer.inner.as_native().unwrap(),
indirect_offset,
});
}
@@ -904,7 +906,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
}
unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) {
self.cmd_buffer.commands.push(C::DispatchIndirect {
indirect_buf: buffer.raw,
indirect_buf: buffer.inner.as_native().unwrap(),
indirect_offset: offset,
});
}

View File

@@ -1,4 +1,4 @@
use super::conv;
use super::{conv, BufferInner};
use crate::auxil::map_naga_stage;
use glow::HasContext;
use std::{convert::TryInto, iter, ptr, sync::Arc};
@@ -314,6 +314,34 @@ impl crate::Device<super::Api> for super::Device {
&self,
desc: &crate::BufferDescriptor,
) -> Result<super::Buffer, crate::DeviceError> {
let target = if desc.usage.contains(crate::BufferUses::INDEX) {
glow::ELEMENT_ARRAY_BUFFER
} else {
glow::ARRAY_BUFFER
};
let emulate_map = self
.shared
.workarounds
.contains(super::Workarounds::EMULATE_BUFFER_MAP)
|| !self
.shared
.private_caps
.contains(super::PrivateCapabilities::BUFFER_ALLOCATION);
if emulate_map
&& desc
.usage
.intersects(crate::BufferUses::MAP_WRITE | crate::BufferUses::MAP_READ)
{
return Ok(super::Buffer {
inner: BufferInner::data_with_capacity(desc.size),
target,
size: desc.size,
map_flags: 0,
});
}
let gl = &self.shared.context.lock();
let target = if desc.usage.contains(crate::BufferUses::INDEX) {
@@ -385,16 +413,17 @@ impl crate::Device<super::Api> for super::Device {
}
Ok(super::Buffer {
raw,
inner: BufferInner::Buffer(raw),
target,
size: desc.size,
map_flags,
emulate_map_allocation: Default::default(),
})
}
unsafe fn destroy_buffer(&self, buffer: super::Buffer) {
let gl = &self.shared.context.lock();
gl.delete_buffer(buffer.raw);
if let BufferInner::Buffer(raw) = buffer.inner {
let gl = &self.shared.context.lock();
gl.delete_buffer(raw);
}
}
unsafe fn map_buffer(
@@ -402,66 +431,48 @@ impl crate::Device<super::Api> for super::Device {
buffer: &super::Buffer,
range: crate::MemoryRange,
) -> Result<crate::BufferMapping, crate::DeviceError> {
let gl = &self.shared.context.lock();
let is_coherent = buffer.map_flags & glow::MAP_COHERENT_BIT != 0;
let ptr = if self
.shared
.workarounds
.contains(super::Workarounds::EMULATE_BUFFER_MAP)
|| !self
.shared
.private_caps
.contains(super::PrivateCapabilities::BUFFER_ALLOCATION)
{
let mut buf = vec![0; buffer.size as usize];
let ptr = buf.as_mut_ptr();
*buffer.emulate_map_allocation.lock().unwrap() = Some(buf);
ptr
} else {
gl.bind_buffer(buffer.target, Some(buffer.raw));
let ptr = gl.map_buffer_range(
buffer.target,
range.start as i32,
(range.end - range.start) as i32,
buffer.map_flags,
);
gl.bind_buffer(buffer.target, None);
ptr
let ptr = match buffer.inner {
BufferInner::Data(ref data) => {
let mut vec = data.lock().unwrap();
let slice = &mut vec.as_mut_slice()[range.start as usize..range.end as usize];
slice.as_mut_ptr()
}
BufferInner::Buffer(raw) => {
let gl = &self.shared.context.lock();
gl.bind_buffer(buffer.target, Some(raw));
let ptr = gl.map_buffer_range(
buffer.target,
range.start as i32,
(range.end - range.start) as i32,
buffer.map_flags,
);
gl.bind_buffer(buffer.target, None);
ptr
}
};
Ok(crate::BufferMapping {
ptr: ptr::NonNull::new(ptr).ok_or(crate::DeviceError::Lost)?,
is_coherent,
})
}
unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
let gl = &self.shared.context.lock();
gl.bind_buffer(buffer.target, Some(buffer.raw));
if let Some(buf) = buffer.emulate_map_allocation.lock().unwrap().take() {
gl.buffer_sub_data_u8_slice(buffer.target, 0, &buf);
drop(buf);
} else {
if let BufferInner::Buffer(raw) = buffer.inner {
let gl = &self.shared.context.lock();
gl.bind_buffer(buffer.target, Some(raw));
gl.unmap_buffer(buffer.target);
gl.bind_buffer(buffer.target, None);
}
gl.bind_buffer(buffer.target, None);
Ok(())
}
unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I)
where
I: Iterator<Item = crate::MemoryRange>,
{
let gl = &self.shared.context.lock();
gl.bind_buffer(buffer.target, Some(buffer.raw));
for range in ranges {
if let Some(buf) = buffer.emulate_map_allocation.lock().unwrap().as_ref() {
gl.buffer_sub_data_u8_slice(buffer.target, range.start as i32, buf);
} else {
if let BufferInner::Buffer(raw) = buffer.inner {
let gl = &self.shared.context.lock();
gl.bind_buffer(buffer.target, Some(raw));
for range in ranges {
gl.flush_mapped_buffer_range(
buffer.target,
range.start as i32,
@@ -851,7 +862,7 @@ impl crate::Device<super::Api> for super::Device {
wgt::BindingType::Buffer { .. } => {
let bb = &desc.buffers[entry.resource_index as usize];
super::RawBinding::Buffer {
raw: bb.buffer.raw,
raw: bb.buffer.inner.as_native().unwrap(),
offset: bb.offset as i32,
size: match bb.size {
Some(s) => s.get() as i32,

View File

@@ -210,13 +210,30 @@ pub struct Queue {
current_index_buffer: Option<glow::Buffer>,
}
#[derive(Debug, Clone)]
pub enum BufferInner {
Buffer(glow::Buffer),
Data(Arc<std::sync::Mutex<Vec<u8>>>),
}
impl BufferInner {
pub fn data_with_capacity(size: u64) -> BufferInner {
BufferInner::Data(Arc::new(std::sync::Mutex::new(vec![0; size as usize])))
}
pub fn as_native(&self) -> Option<glow::Buffer> {
match *self {
BufferInner::Buffer(ref buffer) => Some(*buffer),
BufferInner::Data(_) => None,
}
}
}
#[derive(Debug)]
pub struct Buffer {
raw: glow::Buffer,
inner: BufferInner,
target: BindTarget,
size: wgt::BufferAddress,
map_flags: u32,
emulate_map_allocation: std::sync::Mutex<Option<Vec<u8>>>,
}
// Safe: WASM doesn't have threads
@@ -567,14 +584,14 @@ enum Command {
indirect_offset: wgt::BufferAddress,
},
ClearBuffer {
dst: glow::Buffer,
dst: BufferInner,
dst_target: BindTarget,
range: crate::MemoryRange,
},
CopyBufferToBuffer {
src: glow::Buffer,
src: BufferInner,
src_target: BindTarget,
dst: glow::Buffer,
dst: BufferInner,
dst_target: BindTarget,
copy: crate::BufferCopy,
},
@@ -586,7 +603,7 @@ enum Command {
copy: crate::TextureCopy,
},
CopyBufferToTexture {
src: glow::Buffer,
src: BufferInner,
#[allow(unused)]
src_target: BindTarget,
dst: glow::Texture,
@@ -598,7 +615,7 @@ enum Command {
src: glow::Texture,
src_target: BindTarget,
src_format: wgt::TextureFormat,
dst: glow::Buffer,
dst: BufferInner,
#[allow(unused)]
dst_target: BindTarget,
copy: crate::BufferTextureCopy,
@@ -608,7 +625,7 @@ enum Command {
EndQuery(BindTarget),
CopyQueryResults {
query_range: Range<u32>,
dst: glow::Buffer,
dst: BufferInner,
dst_target: BindTarget,
dst_offset: wgt::BufferAddress,
},

View File

@@ -194,32 +194,39 @@ impl super::Queue {
gl.dispatch_compute_indirect(indirect_offset as i32);
}
C::ClearBuffer {
dst,
ref dst,
dst_target,
ref range,
} => {
gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer));
gl.bind_buffer(dst_target, Some(dst));
let mut dst_offset = range.start;
while dst_offset < range.end {
let size = (range.end - dst_offset).min(super::ZERO_BUFFER_SIZE as u64);
gl.copy_buffer_sub_data(
glow::COPY_READ_BUFFER,
dst_target,
0,
dst_offset as i32,
size as i32,
);
dst_offset += size;
} => match *dst {
super::BufferInner::Buffer(buffer) => {
gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer));
gl.bind_buffer(dst_target, Some(buffer));
let mut dst_offset = range.start;
while dst_offset < range.end {
let size = (range.end - dst_offset).min(super::ZERO_BUFFER_SIZE as u64);
gl.copy_buffer_sub_data(
glow::COPY_READ_BUFFER,
dst_target,
0,
dst_offset as i32,
size as i32,
);
dst_offset += size;
}
}
}
super::BufferInner::Data(ref data) => {
data.lock().unwrap().as_mut_slice()[range.start as usize..range.end as usize]
.fill(0);
}
},
C::CopyBufferToBuffer {
src,
ref src,
src_target,
dst,
ref dst,
dst_target,
copy,
} => {
let copy_src_target = glow::COPY_READ_BUFFER;
let is_index_buffer_only_element_dst = !self
.shared
.private_caps
@@ -227,44 +234,52 @@ impl super::Queue {
&& dst_target == glow::ELEMENT_ARRAY_BUFFER
|| src_target == glow::ELEMENT_ARRAY_BUFFER;
let copy_src_target = glow::COPY_READ_BUFFER;
// WebGL not allowed to copy data from other targets to element buffer and can't copy element data to other buffers
let copy_dst_target = if is_index_buffer_only_element_dst {
glow::ELEMENT_ARRAY_BUFFER
} else {
glow::COPY_WRITE_BUFFER
};
let size = copy.size.get() as usize;
match (src, dst) {
(
&super::BufferInner::Buffer(ref src),
&super::BufferInner::Buffer(ref dst),
) => {
gl.bind_buffer(copy_src_target, Some(*src));
gl.bind_buffer(copy_dst_target, Some(*dst));
gl.copy_buffer_sub_data(
copy_src_target,
copy_dst_target,
copy.src_offset as _,
copy.dst_offset as _,
copy.size.get() as _,
);
}
(&super::BufferInner::Buffer(src), &super::BufferInner::Data(ref data)) => {
let mut data = data.lock().unwrap();
let dst_data = &mut data.as_mut_slice()
[copy.dst_offset as usize..copy.dst_offset as usize + size];
gl.bind_buffer(copy_src_target, Some(src));
gl.bind_buffer(copy_dst_target, Some(dst));
//TODO: remove this slow path completely
// https://github.com/gfx-rs/wgpu/issues/2031
if is_index_buffer_only_element_dst {
let mut buffer_data = vec![0; copy.size.get() as usize];
gl.get_buffer_sub_data(
copy_src_target,
copy.src_offset as i32,
&mut buffer_data,
);
gl.buffer_sub_data_u8_slice(
copy_dst_target,
copy.dst_offset as i32,
&buffer_data,
);
} else {
gl.copy_buffer_sub_data(
copy_src_target,
copy_dst_target,
copy.src_offset as _,
copy.dst_offset as _,
copy.size.get() as _,
);
gl.bind_buffer(copy_src_target, Some(src));
gl.get_buffer_sub_data(copy_src_target, copy.src_offset as i32, dst_data);
}
(&super::BufferInner::Data(ref data), &super::BufferInner::Buffer(dst)) => {
let data = data.lock().unwrap();
let src_data = &data.as_slice()
[copy.src_offset as usize..copy.src_offset as usize + size];
gl.bind_buffer(copy_dst_target, Some(dst));
gl.buffer_sub_data_u8_slice(
copy_dst_target,
copy.dst_offset as i32,
src_data,
);
}
(&super::BufferInner::Data(_), &super::BufferInner::Data(_)) => {
todo!()
}
}
gl.bind_buffer(copy_src_target, None);
if is_index_buffer_only_element_dst {
gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, self.current_index_buffer);
} else {
@@ -327,7 +342,7 @@ impl super::Queue {
}
}
C::CopyBufferToTexture {
src,
ref src,
src_target: _,
dst,
dst_target,
@@ -336,7 +351,6 @@ impl super::Queue {
} => {
let format_info = dst_format.describe();
let format_desc = self.shared.describe_texture_format(dst_format);
let row_texels = copy.buffer_layout.bytes_per_row.map_or(0, |bpr| {
format_info.block_dimensions.0 as u32 * bpr.get()
/ format_info.block_size as u32
@@ -345,14 +359,24 @@ impl super::Queue {
.buffer_layout
.rows_per_image
.map_or(0, |rpi| format_info.block_dimensions.1 as u32 * rpi.get());
gl.bind_texture(dst_target, Some(dst));
gl.pixel_store_i32(glow::UNPACK_ROW_LENGTH, row_texels as i32);
gl.pixel_store_i32(glow::UNPACK_IMAGE_HEIGHT, column_texels as i32);
gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(src));
gl.bind_texture(dst_target, Some(dst));
if format_info.block_dimensions == (1, 1) {
let unpack_data =
glow::PixelUnpackData::BufferOffset(copy.buffer_layout.offset as u32);
let buffer_data;
let unpack_data = match *src {
super::BufferInner::Buffer(buffer) => {
gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer));
glow::PixelUnpackData::BufferOffset(copy.buffer_layout.offset as u32)
}
super::BufferInner::Data(ref data) => {
buffer_data = data.lock().unwrap();
let src_data =
&buffer_data.as_slice()[copy.buffer_layout.offset as usize..];
glow::PixelUnpackData::Slice(src_data)
}
};
match dst_target {
glow::TEXTURE_3D | glow::TEXTURE_2D_ARRAY => {
gl.tex_sub_image_3d(
@@ -383,7 +407,6 @@ impl super::Queue {
);
}
glow::TEXTURE_CUBE_MAP => {
let offset = copy.buffer_layout.offset as u32;
gl.tex_sub_image_2d(
CUBEMAP_FACES[copy.texture_base.array_layer as usize],
copy.texture_base.mip_level as i32,
@@ -393,7 +416,7 @@ impl super::Queue {
copy.size.height as i32,
format_desc.external,
format_desc.data_type,
glow::PixelUnpackData::BufferOffset(offset),
unpack_data,
);
}
glow::TEXTURE_CUBE_MAP_ARRAY => {
@@ -419,9 +442,22 @@ impl super::Queue {
copy.buffer_layout.rows_per_image.map_or(1, |rpi| rpi.get())
* copy.buffer_layout.bytes_per_row.map_or(1, |bpr| bpr.get());
let offset = copy.buffer_layout.offset as u32;
let unpack_data = glow::CompressedPixelUnpackData::BufferRange(
offset..offset + bytes_per_image,
);
let buffer_data;
let unpack_data = match *src {
super::BufferInner::Buffer(buffer) => {
gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer));
glow::CompressedPixelUnpackData::BufferRange(
offset..offset + bytes_per_image,
)
}
super::BufferInner::Data(ref data) => {
buffer_data = data.lock().unwrap();
let src_data = &buffer_data.as_slice()
[(offset as usize)..(offset + bytes_per_image) as usize];
glow::CompressedPixelUnpackData::Slice(src_data)
}
};
match dst_target {
glow::TEXTURE_3D | glow::TEXTURE_2D_ARRAY => {
gl.compressed_tex_sub_image_3d(
@@ -458,9 +494,7 @@ impl super::Queue {
copy.size.width as i32,
copy.size.height as i32,
format_desc.internal,
glow::CompressedPixelUnpackData::BufferRange(
offset..offset + bytes_per_image,
),
unpack_data,
);
}
glow::TEXTURE_CUBE_MAP_ARRAY => {
@@ -486,7 +520,7 @@ impl super::Queue {
src,
src_target,
src_format,
dst,
ref dst,
dst_target: _,
ref copy,
} => {
@@ -508,8 +542,6 @@ impl super::Queue {
.map_or(copy.size.width, |bpr| {
bpr.get() / format_info.block_size as u32
});
gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32);
gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(dst));
gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo));
if is_3d_target(src_target) {
@@ -530,6 +562,20 @@ impl super::Queue {
copy.texture_base.mip_level as i32,
);
}
let mut buffer_data;
let unpack_data = match *dst {
super::BufferInner::Buffer(buffer) => {
gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32);
gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(buffer));
glow::PixelPackData::BufferOffset(copy.buffer_layout.offset as u32)
}
super::BufferInner::Data(ref data) => {
buffer_data = data.lock().unwrap();
let dst_data =
&mut buffer_data.as_mut_slice()[copy.buffer_layout.offset as usize..];
glow::PixelPackData::Slice(dst_data)
}
};
gl.read_pixels(
copy.texture_base.origin.x as i32,
copy.texture_base.origin.y as i32,
@@ -537,7 +583,7 @@ impl super::Queue {
copy.size.height as i32,
format_desc.external,
format_desc.data_type,
glow::PixelPackData::BufferOffset(copy.buffer_layout.offset as u32),
unpack_data,
);
}
C::SetIndexBuffer(buffer) => {
@@ -552,7 +598,7 @@ impl super::Queue {
}
C::CopyQueryResults {
ref query_range,
dst,
ref dst,
dst_target,
dst_offset,
} => {
@@ -565,8 +611,17 @@ impl super::Queue {
self.temp_query_results.as_ptr() as *const u8,
self.temp_query_results.len() * mem::size_of::<u64>(),
);
gl.bind_buffer(dst_target, Some(dst));
gl.buffer_sub_data_u8_slice(dst_target, dst_offset as i32, query_data);
match *dst {
super::BufferInner::Buffer(buffer) => {
gl.bind_buffer(dst_target, Some(buffer));
gl.buffer_sub_data_u8_slice(dst_target, dst_offset as i32, query_data);
}
super::BufferInner::Data(ref data) => {
let data = &mut *data.lock().unwrap();
let len = query_data.len().min(data.len());
data[..len].copy_from_slice(&query_data[..len]);
}
}
}
C::ResetFramebuffer => {
gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo));