diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs index 5a060e1344..1a7983c5ce 100644 --- a/wgpu-core/src/command/allocator.rs +++ b/wgpu-core/src/command/allocator.rs @@ -4,14 +4,13 @@ use super::CommandBuffer; use crate::{ - hub::GfxBackend, id::DeviceId, track::TrackerSet, LifeGuard, PrivateFeatures, Stored, - SubmissionIndex, + hub::GfxBackend, id::DeviceId, track::TrackerSet, PrivateFeatures, Stored, SubmissionIndex, }; use hal::{command::CommandBuffer as _, device::Device as _, pool::CommandPool as _}; use parking_lot::Mutex; -use std::{collections::HashMap, sync::atomic::Ordering, thread}; +use std::{collections::HashMap, thread}; const GROW_AMOUNT: usize = 20; @@ -20,21 +19,17 @@ struct CommandPool { raw: B::CommandPool, total: usize, available: Vec, - pending: Vec>, + pending: Vec<(B::CommandBuffer, SubmissionIndex)>, } impl CommandPool { fn maintain(&mut self, lowest_active_index: SubmissionIndex) { for i in (0..self.pending.len()).rev() { - let index = self.pending[i] - .life_guard - .submission_index - .load(Ordering::Acquire); - if index < lowest_active_index { - let cmd_buf = self.pending.swap_remove(i); + if self.pending[i].1 < lowest_active_index { + let cmd_buf = self.pending.swap_remove(i).0; log::trace!( "recycling comb submitted in {} when {} is lowest active", - index, + self.pending[i].1, lowest_active_index, ); self.recycle(cmd_buf); @@ -42,13 +37,11 @@ impl CommandPool { } } - fn recycle(&mut self, cmd_buf: CommandBuffer) { - for mut raw in cmd_buf.raw { - unsafe { - raw.reset(false); - } - self.available.push(raw); + fn recycle(&mut self, mut raw: B::CommandBuffer) { + unsafe { + raw.reset(false); } + self.available.push(raw); } fn allocate(&mut self) -> B::CommandBuffer { @@ -113,7 +106,6 @@ impl CommandAllocator { is_recording: true, recorded_thread_id: thread_id, device_id, - life_guard: LifeGuard::new(), trackers: TrackerSet::new(B::VARIANT), used_swap_chain: None, limits, @@ -138,32 +130,39 @@ impl CommandAllocator { } } - pub fn extend(&self, cmd_buf: &CommandBuffer) -> B::CommandBuffer { + fn allocate_for_thread_id(&self, thread_id: thread::ThreadId) -> B::CommandBuffer { let mut inner = self.inner.lock(); - inner - .pools - .get_mut(&cmd_buf.recorded_thread_id) - .unwrap() - .allocate() + inner.pools.get_mut(&thread_id).unwrap().allocate() + } + + pub fn allocate_internal(&self) -> B::CommandBuffer { + self.allocate_for_thread_id(thread::current().id()) + } + + pub fn extend(&self, cmd_buf: &CommandBuffer) -> B::CommandBuffer { + self.allocate_for_thread_id(cmd_buf.recorded_thread_id) } pub fn discard(&self, mut cmd_buf: CommandBuffer) { cmd_buf.trackers.clear(); let mut inner = self.inner.lock(); - inner - .pools - .get_mut(&cmd_buf.recorded_thread_id) - .unwrap() - .recycle(cmd_buf); + let pool = inner.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap(); + for raw in cmd_buf.raw { + pool.recycle(raw); + } } - pub fn after_submit(&self, mut cmd_buf: CommandBuffer, submit_index: SubmissionIndex) { - cmd_buf.trackers.clear(); - cmd_buf - .life_guard - .submission_index - .store(submit_index, Ordering::Release); + pub fn after_submit_internal(&self, raw: B::CommandBuffer, submit_index: SubmissionIndex) { + let mut inner = self.inner.lock(); + inner + .pools + .get_mut(&thread::current().id()) + .unwrap() + .pending + .push((raw, submit_index)); + } + pub fn after_submit(&self, cmd_buf: CommandBuffer, submit_index: SubmissionIndex) { // Record this command buffer as pending let mut inner = self.inner.lock(); inner @@ -171,7 +170,7 @@ impl CommandAllocator { .get_mut(&cmd_buf.recorded_thread_id) .unwrap() .pending - .push(cmd_buf); + .extend(cmd_buf.raw.into_iter().map(|raw| (raw, submit_index))); } pub fn maintain(&self, device: &B::Device, lowest_active_index: SubmissionIndex) { @@ -197,8 +196,8 @@ impl CommandAllocator { pub fn destroy(self, device: &B::Device) { let mut inner = self.inner.lock(); for (_, mut pool) in inner.pools.drain() { - while let Some(cmd_buf) = pool.pending.pop() { - pool.recycle(cmd_buf); + while let Some((raw, _)) = pool.pending.pop() { + pool.recycle(raw); } if pool.total != pool.available.len() { log::error!( diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 63a3b61e9e..ea8ed0e5c5 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -19,7 +19,7 @@ use crate::{ id, resource::{Buffer, Texture}, track::TrackerSet, - LifeGuard, PrivateFeatures, Stored, + PrivateFeatures, Stored, }; use peek_poke::PeekPoke; @@ -157,7 +157,6 @@ pub struct CommandBuffer { is_recording: bool, recorded_thread_id: ThreadId, pub(crate) device_id: Stored, - pub(crate) life_guard: LifeGuard, pub(crate) trackers: TrackerSet, pub(crate) used_swap_chain: Option<(Stored, B::Framebuffer)>, limits: wgt::Limits, diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 5906e57920..e6e41b00e4 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -15,7 +15,6 @@ use copyless::VecHelper as _; use gfx_descriptor::DescriptorAllocator; use gfx_memory::{Block, Heaps}; use hal::{ - self, command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _, @@ -202,6 +201,7 @@ pub struct Device { pub(crate) private_features: PrivateFeatures, limits: wgt::Limits, extensions: wgt::Extensions, + pending_write_command_buffer: Option, #[cfg(feature = "trace")] pub(crate) trace: Option>, } @@ -273,6 +273,7 @@ impl Device { }, limits: desc.limits.clone(), extensions: desc.extensions.clone(), + pending_write_command_buffer: None, } } @@ -1683,6 +1684,116 @@ impl Global { self.command_encoder_destroy::(command_buffer_id) } + pub fn queue_write_buffer( + &self, + queue_id: id::QueueId, + data: &[u8], + buffer_id: id::BufferId, + buffer_offset: BufferAddress, + ) { + let hub = B::hub(self); + let mut token = Token::root(); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = &mut device_guard[queue_id]; + let (buffer_guard, _) = hub.buffers.read(&mut token); + + #[cfg(feature = "trace")] + match device.trace { + Some(ref trace) => { + let mut trace = trace.lock(); + let data_path = trace.make_binary("bin", data); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: buffer_offset..buffer_offset + data.len() as BufferAddress, + }); + } + None => {} + } + + let mut trackers = device.trackers.lock(); + let (dst, transition) = trackers.buffers.use_replace( + &*buffer_guard, + buffer_id, + (), + resource::BufferUse::COPY_DST, + ); + assert!( + dst.usage.contains(wgt::BufferUsage::COPY_DST), + "Write buffer usage {:?} must contain usage flag DST_SRC", + dst.usage + ); + + let submit_index = 1 + device.life_guard.submission_index.load(Ordering::Relaxed); + if !dst.life_guard.use_at(submit_index) { + device.temp_suspected.buffers.push(buffer_id); + } + + let src_raw = unsafe { + let mut buf = device + .raw + .create_buffer( + data.len() as BufferAddress, + hal::buffer::Usage::TRANSFER_SRC, + ) + .unwrap(); + device.raw.set_buffer_name(&mut buf, ""); + buf + }; + //TODO: do we need to transition into HOST_WRITE access first? + let requirements = unsafe { device.raw.get_buffer_requirements(&src_raw) }; + + let mut memory = device + .mem_allocator + .lock() + .allocate( + &device.raw, + requirements.type_mask as u32, + gfx_memory::MemoryUsage::Staging { read_back: false }, + gfx_memory::Kind::Linear, + requirements.size, + requirements.alignment, + ) + .unwrap(); + + let mut mapped = memory.map(&device.raw, hal::memory::Segment::ALL).unwrap(); + unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) } + .unwrap() + .slice + .copy_from_slice(data); + + let mut comb = match device.pending_write_command_buffer.take() { + Some(comb) => comb, + None => { + let mut comb = device.com_allocator.allocate_internal(); + unsafe { + comb.begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); + } + comb + } + }; + let region = hal::command::BufferCopy { + src: 0, + dst: buffer_offset, + size: data.len() as _, + }; + unsafe { + comb.pipeline_barrier( + all_buffer_stages()..all_buffer_stages(), + hal::memory::Dependencies::empty(), + iter::once(hal::memory::Barrier::Buffer { + states: hal::buffer::Access::HOST_WRITE..hal::buffer::Access::TRANSFER_READ, + target: &src_raw, + range: hal::buffer::SubRange::WHOLE, + families: None, + }) + .chain(transition.map(|pending| pending.into_hal(dst))), + ); + comb.copy_buffer(&src_raw, &dst.raw, iter::once(region)); + } + device.pending_write_command_buffer = Some(comb); + } + pub fn queue_submit( &self, queue_id: id::QueueId, @@ -1694,6 +1805,7 @@ impl Global { let mut token = Token::root(); let (mut device_guard, mut token) = hub.devices.write(&mut token); let device = &mut device_guard[queue_id]; + let pending_write_command_buffer = device.pending_write_command_buffer.take(); device.temp_suspected.clear(); let submit_index = 1 + device @@ -1815,9 +1927,11 @@ impl Global { // now prepare the GPU submission let fence = device.raw.create_fence(false).unwrap(); let submission = hal::queue::Submission { - command_buffers: command_buffer_ids - .iter() - .flat_map(|&cmb_id| &command_buffer_guard[cmb_id].raw), + command_buffers: pending_write_command_buffer.as_ref().into_iter().chain( + command_buffer_ids + .iter() + .flat_map(|&cmb_id| &command_buffer_guard[cmb_id].raw), + ), wait_semaphores: Vec::new(), signal_semaphores: signal_swapchain_semaphores .into_iter() @@ -1828,6 +1942,11 @@ impl Global { device.queue_group.queues[0].submit(submission, Some(&fence)); } + if let Some(comb_raw) = pending_write_command_buffer { + device + .com_allocator + .after_submit_internal(comb_raw, submit_index); + } (submit_index, fence) };