diff --git a/player/src/main.rs b/player/src/main.rs index b864edb0ec..94aecbfc79 100644 --- a/player/src/main.rs +++ b/player/src/main.rs @@ -402,11 +402,20 @@ impl GlobalExt for wgc::hub::Global { A::DestroyRenderPipeline(id) => { self.render_pipeline_destroy::(id); } - A::WriteBuffer { id, data, range } => { + A::WriteBuffer { + id, + data, + range, + queued, + } => { let bin = std::fs::read(dir.join(data)).unwrap(); let size = (range.end - range.start) as usize; - self.device_wait_for_buffer::(device, id); - self.device_set_buffer_sub_data::(device, id, range.start, &bin[..size]); + if queued { + self.queue_write_buffer::(device, &bin, id, range.start); + } else { + self.device_wait_for_buffer::(device, id); + self.device_set_buffer_sub_data::(device, id, range.start, &bin[..size]); + } } A::Submit(_index, commands) => { let encoder = self.device_create_command_encoder::( @@ -446,7 +455,7 @@ fn main() { log::info!("Found {} actions", actions.len()); #[cfg(feature = "winit")] - let mut event_loop = { + let event_loop = { log::info!("Creating a window"); EventLoop::new() }; @@ -514,7 +523,6 @@ fn main() { use winit::{ event::{ElementState, Event, KeyboardInput, VirtualKeyCode, WindowEvent}, event_loop::ControlFlow, - platform::desktop::EventLoopExtDesktop, }; let mut frame_count = 0; diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs index 5a060e1344..07fd7c2e4e 100644 --- a/wgpu-core/src/command/allocator.rs +++ b/wgpu-core/src/command/allocator.rs @@ -4,14 +4,14 @@ use super::CommandBuffer; use crate::{ - hub::GfxBackend, id::DeviceId, track::TrackerSet, LifeGuard, PrivateFeatures, Stored, + hub::GfxBackend, id::DeviceId, track::TrackerSet, FastHashMap, PrivateFeatures, Stored, SubmissionIndex, }; use hal::{command::CommandBuffer as _, device::Device as _, pool::CommandPool as _}; use parking_lot::Mutex; -use std::{collections::HashMap, sync::atomic::Ordering, thread}; +use std::thread; const GROW_AMOUNT: usize = 20; @@ -20,21 +20,17 @@ struct CommandPool { raw: B::CommandPool, total: usize, available: Vec, - pending: Vec>, + pending: Vec<(B::CommandBuffer, SubmissionIndex)>, } impl CommandPool { fn maintain(&mut self, lowest_active_index: SubmissionIndex) { for i in (0..self.pending.len()).rev() { - let index = self.pending[i] - .life_guard - .submission_index - .load(Ordering::Acquire); - if index < lowest_active_index { - let cmd_buf = self.pending.swap_remove(i); + if self.pending[i].1 < lowest_active_index { + let cmd_buf = self.pending.swap_remove(i).0; log::trace!( "recycling comb submitted in {} when {} is lowest active", - index, + self.pending[i].1, lowest_active_index, ); self.recycle(cmd_buf); @@ -42,13 +38,11 @@ impl CommandPool { } } - fn recycle(&mut self, cmd_buf: CommandBuffer) { - for mut raw in cmd_buf.raw { - unsafe { - raw.reset(false); - } - self.available.push(raw); + fn recycle(&mut self, mut raw: B::CommandBuffer) { + unsafe { + raw.reset(false); } + self.available.push(raw); } fn allocate(&mut self) -> B::CommandBuffer { @@ -68,12 +62,13 @@ impl CommandPool { #[derive(Debug)] struct Inner { - pools: HashMap>, + pools: FastHashMap>, } #[derive(Debug)] pub struct CommandAllocator { queue_family: hal::queue::QueueFamilyId, + internal_thread_id: thread::ThreadId, inner: Mutex>, } @@ -113,7 +108,6 @@ impl CommandAllocator { is_recording: true, recorded_thread_id: thread_id, device_id, - life_guard: LifeGuard::new(), trackers: TrackerSet::new(B::VARIANT), used_swap_chain: None, limits, @@ -129,41 +123,75 @@ impl CommandAllocator { } impl CommandAllocator { - pub fn new(queue_family: hal::queue::QueueFamilyId) -> Self { + pub fn new(queue_family: hal::queue::QueueFamilyId, device: &B::Device) -> Self { + let internal_thread_id = thread::current().id(); + log::info!("Starting on (internal) thread {:?}", internal_thread_id); + let mut pools = FastHashMap::default(); + pools.insert( + internal_thread_id, + CommandPool { + raw: unsafe { + device + .create_command_pool( + queue_family, + hal::pool::CommandPoolCreateFlags::RESET_INDIVIDUAL, + ) + .unwrap() + }, + total: 0, + available: Vec::new(), + pending: Vec::new(), + }, + ); CommandAllocator { queue_family, - inner: Mutex::new(Inner { - pools: HashMap::new(), - }), + internal_thread_id, + inner: Mutex::new(Inner { pools }), } } + fn allocate_for_thread_id(&self, thread_id: thread::ThreadId) -> B::CommandBuffer { + let mut inner = self.inner.lock(); + inner.pools.get_mut(&thread_id).unwrap().allocate() + } + + pub fn allocate_internal(&self) -> B::CommandBuffer { + self.allocate_for_thread_id(self.internal_thread_id) + } + pub fn extend(&self, cmd_buf: &CommandBuffer) -> B::CommandBuffer { + self.allocate_for_thread_id(cmd_buf.recorded_thread_id) + } + + pub fn discard_internal(&self, raw: B::CommandBuffer) { let mut inner = self.inner.lock(); inner .pools - .get_mut(&cmd_buf.recorded_thread_id) + .get_mut(&self.internal_thread_id) .unwrap() - .allocate() + .recycle(raw); } pub fn discard(&self, mut cmd_buf: CommandBuffer) { cmd_buf.trackers.clear(); let mut inner = self.inner.lock(); - inner - .pools - .get_mut(&cmd_buf.recorded_thread_id) - .unwrap() - .recycle(cmd_buf); + let pool = inner.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap(); + for raw in cmd_buf.raw { + pool.recycle(raw); + } } - pub fn after_submit(&self, mut cmd_buf: CommandBuffer, submit_index: SubmissionIndex) { - cmd_buf.trackers.clear(); - cmd_buf - .life_guard - .submission_index - .store(submit_index, Ordering::Release); + pub fn after_submit_internal(&self, raw: B::CommandBuffer, submit_index: SubmissionIndex) { + let mut inner = self.inner.lock(); + inner + .pools + .get_mut(&thread::current().id()) + .unwrap() + .pending + .push((raw, submit_index)); + } + pub fn after_submit(&self, cmd_buf: CommandBuffer, submit_index: SubmissionIndex) { // Record this command buffer as pending let mut inner = self.inner.lock(); inner @@ -171,7 +199,7 @@ impl CommandAllocator { .get_mut(&cmd_buf.recorded_thread_id) .unwrap() .pending - .push(cmd_buf); + .extend(cmd_buf.raw.into_iter().map(|raw| (raw, submit_index))); } pub fn maintain(&self, device: &B::Device, lowest_active_index: SubmissionIndex) { @@ -197,8 +225,8 @@ impl CommandAllocator { pub fn destroy(self, device: &B::Device) { let mut inner = self.inner.lock(); for (_, mut pool) in inner.pools.drain() { - while let Some(cmd_buf) = pool.pending.pop() { - pool.recycle(cmd_buf); + while let Some((raw, _)) = pool.pending.pop() { + pool.recycle(raw); } if pool.total != pool.available.len() { log::error!( diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 63a3b61e9e..ea8ed0e5c5 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -19,7 +19,7 @@ use crate::{ id, resource::{Buffer, Texture}, track::TrackerSet, - LifeGuard, PrivateFeatures, Stored, + PrivateFeatures, Stored, }; use peek_poke::PeekPoke; @@ -157,7 +157,6 @@ pub struct CommandBuffer { is_recording: bool, recorded_thread_id: ThreadId, pub(crate) device_id: Stored, - pub(crate) life_guard: LifeGuard, pub(crate) trackers: TrackerSet, pub(crate) used_swap_chain: Option<(Stored, B::Framebuffer)>, limits: wgt::Limits, diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 10b2f4959d..25fdf9299a 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -212,12 +212,15 @@ impl LifetimeTracker { index: SubmissionIndex, fence: B::Fence, new_suspects: &SuspectedResources, + temp_buffers: impl Iterator)>, ) { + let mut last_resources = NonReferencedResources::new(); + last_resources.buffers.extend(temp_buffers); self.suspected_resources.extend(new_suspects); self.active.alloc().init(ActiveSubmission { index, fence, - last_resources: NonReferencedResources::new(), + last_resources, mapped: Vec::new(), }); } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 5906e57920..968c4abeba 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -15,14 +15,11 @@ use copyless::VecHelper as _; use gfx_descriptor::DescriptorAllocator; use gfx_memory::{Block, Heaps}; use hal::{ - self, command::CommandBuffer as _, device::Device as _, - queue::CommandQueue as _, window::{PresentationSurface as _, Surface as _}, }; use parking_lot::{Mutex, MutexGuard}; -use smallvec::SmallVec; use wgt::{BufferAddress, InputStepMode, TextureDimension, TextureFormat, BIND_BUFFER_ALIGNMENT}; use std::{ @@ -33,8 +30,10 @@ use std::{ use spirv_headers::ExecutionModel; mod life; +mod queue; #[cfg(any(feature = "trace", feature = "replay"))] pub mod trace; + #[cfg(feature = "trace")] use trace::{Action, Trace}; @@ -202,6 +201,7 @@ pub struct Device { pub(crate) private_features: PrivateFeatures, limits: wgt::Limits, extensions: wgt::Extensions, + pending_writes: queue::PendingWrites, #[cfg(feature = "trace")] pub(crate) trace: Option>, } @@ -221,6 +221,7 @@ impl Device { let life_guard = LifeGuard::new(); life_guard.submission_index.fetch_add(1, Ordering::Relaxed); + let com_allocator = command::CommandAllocator::new(queue_group.family, &raw); let heaps = unsafe { Heaps::new( &mem_props, @@ -230,7 +231,7 @@ impl Device { min_device_allocation: 0x1_0000, }, gfx_memory::LinearConfig { - linear_size: 0x10_0000, + linear_size: 0x100_0000, }, non_coherent_atom_size, ) @@ -244,7 +245,7 @@ impl Device { Device { raw, adapter_id, - com_allocator: command::CommandAllocator::new(queue_group.family), + com_allocator, mem_allocator: Mutex::new(heaps), desc_allocator: Mutex::new(DescriptorAllocator::new()), queue_group, @@ -273,14 +274,22 @@ impl Device { }, limits: desc.limits.clone(), extensions: desc.extensions.clone(), + pending_writes: queue::PendingWrites::new(), } } + fn lock_life_internal<'this, 'token: 'this>( + tracker: &'this Mutex>, + _token: &mut Token<'token, Self>, + ) -> MutexGuard<'this, life::LifetimeTracker> { + tracker.lock() + } + fn lock_life<'this, 'token: 'this>( &'this self, - _token: &mut Token<'token, Self>, + token: &mut Token<'token, Self>, ) -> MutexGuard<'this, life::LifetimeTracker> { - self.life_tracker.lock() + Self::lock_life_internal(&self.life_tracker, token) } fn maintain<'this, 'token: 'this, G: GlobalIdentityHandlerFactory>( @@ -510,9 +519,11 @@ impl Device { } pub(crate) fn dispose(self) { - self.com_allocator.destroy(&self.raw); let mut desc_alloc = self.desc_allocator.into_inner(); let mut mem_alloc = self.mem_allocator.into_inner(); + self.pending_writes + .dispose(&self.raw, &self.com_allocator, &mut mem_alloc); + self.com_allocator.destroy(&self.raw); unsafe { desc_alloc.clear(&self.raw); mem_alloc.clear(&self.raw); @@ -687,6 +698,7 @@ impl Global { id: buffer_id, data: data_path, range: offset..offset + data.len() as BufferAddress, + queued: false, }); } None => (), @@ -1683,179 +1695,6 @@ impl Global { self.command_encoder_destroy::(command_buffer_id) } - pub fn queue_submit( - &self, - queue_id: id::QueueId, - command_buffer_ids: &[id::CommandBufferId], - ) { - let hub = B::hub(self); - - let (submit_index, fence) = { - let mut token = Token::root(); - let (mut device_guard, mut token) = hub.devices.write(&mut token); - let device = &mut device_guard[queue_id]; - device.temp_suspected.clear(); - - let submit_index = 1 + device - .life_guard - .submission_index - .fetch_add(1, Ordering::Relaxed); - - let (mut swap_chain_guard, mut token) = hub.swap_chains.write(&mut token); - let (mut command_buffer_guard, mut token) = hub.command_buffers.write(&mut token); - let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); - let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); - let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); - let (mut buffer_guard, mut token) = hub.buffers.write(&mut token); - let (texture_guard, mut token) = hub.textures.read(&mut token); - let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); - let (sampler_guard, _) = hub.samplers.read(&mut token); - - //Note: locking the trackers has to be done after the storages - let mut signal_swapchain_semaphores = SmallVec::<[_; 1]>::new(); - let mut trackers = device.trackers.lock(); - - //TODO: if multiple command buffers are submitted, we can re-use the last - // native command buffer of the previous chain instead of always creating - // a temporary one, since the chains are not finished. - - // finish all the command buffers first - for &cmb_id in command_buffer_ids { - let comb = &mut command_buffer_guard[cmb_id]; - #[cfg(feature = "trace")] - match device.trace { - Some(ref trace) => trace - .lock() - .add(Action::Submit(submit_index, comb.commands.take().unwrap())), - None => (), - }; - - if let Some((sc_id, fbo)) = comb.used_swap_chain.take() { - let sc = &mut swap_chain_guard[sc_id.value]; - assert!(sc.acquired_view_id.is_some(), - "SwapChainOutput for {:?} was dropped before the respective command buffer {:?} got submitted!", - sc_id.value, cmb_id); - if sc.acquired_framebuffers.is_empty() { - signal_swapchain_semaphores.push(sc_id.value); - } - sc.acquired_framebuffers.push(fbo); - } - - // optimize the tracked states - comb.trackers.optimize(); - - // update submission IDs - for id in comb.trackers.buffers.used() { - if let resource::BufferMapState::Waiting(_) = buffer_guard[id].map_state { - panic!("Buffer has a pending mapping."); - } - if !buffer_guard[id].life_guard.use_at(submit_index) { - if let resource::BufferMapState::Active { .. } = buffer_guard[id].map_state - { - log::warn!("Dropped buffer has a pending mapping."); - unmap_buffer(&device.raw, &mut buffer_guard[id]); - } - device.temp_suspected.buffers.push(id); - } - } - for id in comb.trackers.textures.used() { - if !texture_guard[id].life_guard.use_at(submit_index) { - device.temp_suspected.textures.push(id); - } - } - for id in comb.trackers.views.used() { - if !texture_view_guard[id].life_guard.use_at(submit_index) { - device.temp_suspected.texture_views.push(id); - } - } - for id in comb.trackers.bind_groups.used() { - if !bind_group_guard[id].life_guard.use_at(submit_index) { - device.temp_suspected.bind_groups.push(id); - } - } - for id in comb.trackers.samplers.used() { - if !sampler_guard[id].life_guard.use_at(submit_index) { - device.temp_suspected.samplers.push(id); - } - } - for id in comb.trackers.compute_pipes.used() { - if !compute_pipe_guard[id].life_guard.use_at(submit_index) { - device.temp_suspected.compute_pipelines.push(id); - } - } - for id in comb.trackers.render_pipes.used() { - if !render_pipe_guard[id].life_guard.use_at(submit_index) { - device.temp_suspected.render_pipelines.push(id); - } - } - - // execute resource transitions - let mut transit = device.com_allocator.extend(comb); - unsafe { - // the last buffer was open, closing now - comb.raw.last_mut().unwrap().finish(); - transit.begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); - } - log::trace!("Stitching command buffer {:?} before submission", cmb_id); - command::CommandBuffer::insert_barriers( - &mut transit, - &mut *trackers, - &comb.trackers, - &*buffer_guard, - &*texture_guard, - ); - unsafe { - transit.finish(); - } - comb.raw.insert(0, transit); - } - - log::debug!("Device after submission {}: {:#?}", submit_index, trackers); - - // now prepare the GPU submission - let fence = device.raw.create_fence(false).unwrap(); - let submission = hal::queue::Submission { - command_buffers: command_buffer_ids - .iter() - .flat_map(|&cmb_id| &command_buffer_guard[cmb_id].raw), - wait_semaphores: Vec::new(), - signal_semaphores: signal_swapchain_semaphores - .into_iter() - .map(|sc_id| &swap_chain_guard[sc_id].semaphore), - }; - - unsafe { - device.queue_group.queues[0].submit(submission, Some(&fence)); - } - - (submit_index, fence) - }; - - // No need for write access to the device from here on out - let callbacks = { - let mut token = Token::root(); - let (device_guard, mut token) = hub.devices.read(&mut token); - let device = &device_guard[queue_id]; - - let callbacks = device.maintain(self, false, &mut token); - device.lock_life(&mut token).track_submission( - submit_index, - fence, - &device.temp_suspected, - ); - - // finally, return the command buffers to the allocator - for &cmb_id in command_buffer_ids { - let (cmd_buf, _) = hub.command_buffers.unregister(cmb_id, &mut token); - device.com_allocator.after_submit(cmd_buf, submit_index); - } - - callbacks - }; - - fire_map_callbacks(callbacks); - } - pub fn device_create_render_pipeline( &self, device_id: id::DeviceId, @@ -2623,6 +2462,7 @@ impl Global { id: buffer_id, data, range: sub_range.offset..sub_range.offset + size, + queued: false, }); } None => (), diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs new file mode 100644 index 0000000000..f8cdfb8eb5 --- /dev/null +++ b/wgpu-core/src/device/queue.rs @@ -0,0 +1,354 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#[cfg(feature = "trace")] +use crate::device::trace::Action; +use crate::{ + command::{CommandAllocator, CommandBuffer}, + hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token}, + id, + resource::{BufferMapState, BufferUse}, +}; + +use gfx_memory::{Block, Heaps, MemoryBlock}; +use hal::{command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _}; +use smallvec::SmallVec; +use std::{iter, sync::atomic::Ordering}; + +#[derive(Debug, Default)] +pub(crate) struct PendingWrites { + pub command_buffer: Option, + pub temp_buffers: Vec<(B::Buffer, MemoryBlock)>, +} + +impl PendingWrites { + pub fn new() -> Self { + PendingWrites { + command_buffer: None, + temp_buffers: Vec::new(), + } + } + + pub fn dispose( + self, + device: &B::Device, + com_allocator: &CommandAllocator, + mem_allocator: &mut Heaps, + ) { + if let Some(raw) = self.command_buffer { + com_allocator.discard_internal(raw); + } + for (buffer, memory) in self.temp_buffers { + mem_allocator.free(device, memory); + unsafe { + device.destroy_buffer(buffer); + } + } + } +} + +impl Global { + pub fn queue_write_buffer( + &self, + queue_id: id::QueueId, + data: &[u8], + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + ) { + let hub = B::hub(self); + let mut token = Token::root(); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = &mut device_guard[queue_id]; + let (buffer_guard, _) = hub.buffers.read(&mut token); + + #[cfg(feature = "trace")] + match device.trace { + Some(ref trace) => { + let mut trace = trace.lock(); + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: buffer_offset..buffer_offset + data.len() as wgt::BufferAddress, + queued: true, + }); + } + None => {} + } + + let mut trackers = device.trackers.lock(); + let (dst, transition) = + trackers + .buffers + .use_replace(&*buffer_guard, buffer_id, (), BufferUse::COPY_DST); + assert!( + dst.usage.contains(wgt::BufferUsage::COPY_DST), + "Write buffer usage {:?} must contain usage flag DST_SRC", + dst.usage + ); + + let last_submit_index = device.life_guard.submission_index.load(Ordering::Relaxed); + dst.life_guard.use_at(last_submit_index + 1); + + let mut src_raw = unsafe { + device + .raw + .create_buffer( + data.len() as wgt::BufferAddress, + hal::buffer::Usage::TRANSFER_SRC, + ) + .unwrap() + }; + //TODO: do we need to transition into HOST_WRITE access first? + let requirements = unsafe { device.raw.get_buffer_requirements(&src_raw) }; + + let mut memory = device + .mem_allocator + .lock() + .allocate( + &device.raw, + requirements.type_mask as u32, + gfx_memory::MemoryUsage::Staging { read_back: false }, + gfx_memory::Kind::Linear, + requirements.size, + requirements.alignment, + ) + .unwrap(); + unsafe { + device + .raw + .set_buffer_name(&mut src_raw, ""); + device + .raw + .bind_buffer_memory(memory.memory(), memory.segment().offset, &mut src_raw) + .unwrap(); + } + + let mut mapped = memory.map(&device.raw, hal::memory::Segment::ALL).unwrap(); + unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) } + .unwrap() + .slice[..data.len()] + .copy_from_slice(data); + + let mut comb = match device.pending_writes.command_buffer.take() { + Some(comb) => comb, + None => { + let mut comb = device.com_allocator.allocate_internal(); + unsafe { + comb.begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); + } + comb + } + }; + let region = hal::command::BufferCopy { + src: 0, + dst: buffer_offset, + size: data.len() as _, + }; + unsafe { + comb.pipeline_barrier( + super::all_buffer_stages()..hal::pso::PipelineStage::TRANSFER, + hal::memory::Dependencies::empty(), + iter::once(hal::memory::Barrier::Buffer { + states: hal::buffer::Access::HOST_WRITE..hal::buffer::Access::TRANSFER_READ, + target: &src_raw, + range: hal::buffer::SubRange::WHOLE, + families: None, + }) + .chain(transition.map(|pending| pending.into_hal(dst))), + ); + comb.copy_buffer(&src_raw, &dst.raw, iter::once(region)); + } + device.pending_writes.temp_buffers.push((src_raw, memory)); + device.pending_writes.command_buffer = Some(comb); + } + + pub fn queue_submit( + &self, + queue_id: id::QueueId, + command_buffer_ids: &[id::CommandBufferId], + ) { + let hub = B::hub(self); + + let callbacks = { + let mut token = Token::root(); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = &mut device_guard[queue_id]; + let pending_write_command_buffer = + device + .pending_writes + .command_buffer + .take() + .map(|mut comb_raw| unsafe { + comb_raw.finish(); + comb_raw + }); + device.temp_suspected.clear(); + + let submit_index = 1 + device + .life_guard + .submission_index + .fetch_add(1, Ordering::Relaxed); + + let fence = { + let mut signal_swapchain_semaphores = SmallVec::<[_; 1]>::new(); + let (mut swap_chain_guard, mut token) = hub.swap_chains.write(&mut token); + let (mut command_buffer_guard, mut token) = hub.command_buffers.write(&mut token); + + { + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); + let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); + let (mut buffer_guard, mut token) = hub.buffers.write(&mut token); + let (texture_guard, mut token) = hub.textures.read(&mut token); + let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); + let (sampler_guard, _) = hub.samplers.read(&mut token); + + //Note: locking the trackers has to be done after the storages + let mut trackers = device.trackers.lock(); + + //TODO: if multiple command buffers are submitted, we can re-use the last + // native command buffer of the previous chain instead of always creating + // a temporary one, since the chains are not finished. + + // finish all the command buffers first + for &cmb_id in command_buffer_ids { + let comb = &mut command_buffer_guard[cmb_id]; + #[cfg(feature = "trace")] + match device.trace { + Some(ref trace) => trace + .lock() + .add(Action::Submit(submit_index, comb.commands.take().unwrap())), + None => (), + }; + + if let Some((sc_id, fbo)) = comb.used_swap_chain.take() { + let sc = &mut swap_chain_guard[sc_id.value]; + assert!(sc.acquired_view_id.is_some(), + "SwapChainOutput for {:?} was dropped before the respective command buffer {:?} got submitted!", + sc_id.value, cmb_id); + if sc.acquired_framebuffers.is_empty() { + signal_swapchain_semaphores.push(sc_id.value); + } + sc.acquired_framebuffers.push(fbo); + } + + // optimize the tracked states + comb.trackers.optimize(); + + // update submission IDs + for id in comb.trackers.buffers.used() { + if let BufferMapState::Waiting(_) = buffer_guard[id].map_state { + panic!("Buffer has a pending mapping."); + } + if !buffer_guard[id].life_guard.use_at(submit_index) { + if let BufferMapState::Active { .. } = buffer_guard[id].map_state { + log::warn!("Dropped buffer has a pending mapping."); + super::unmap_buffer(&device.raw, &mut buffer_guard[id]); + } + device.temp_suspected.buffers.push(id); + } + } + for id in comb.trackers.textures.used() { + if !texture_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.textures.push(id); + } + } + for id in comb.trackers.views.used() { + if !texture_view_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.texture_views.push(id); + } + } + for id in comb.trackers.bind_groups.used() { + if !bind_group_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.bind_groups.push(id); + } + } + for id in comb.trackers.samplers.used() { + if !sampler_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.samplers.push(id); + } + } + for id in comb.trackers.compute_pipes.used() { + if !compute_pipe_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.compute_pipelines.push(id); + } + } + for id in comb.trackers.render_pipes.used() { + if !render_pipe_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.render_pipelines.push(id); + } + } + + // execute resource transitions + let mut transit = device.com_allocator.extend(comb); + unsafe { + // the last buffer was open, closing now + comb.raw.last_mut().unwrap().finish(); + transit + .begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); + } + log::trace!("Stitching command buffer {:?} before submission", cmb_id); + CommandBuffer::insert_barriers( + &mut transit, + &mut *trackers, + &comb.trackers, + &*buffer_guard, + &*texture_guard, + ); + unsafe { + transit.finish(); + } + comb.raw.insert(0, transit); + } + + log::debug!("Device after submission {}: {:#?}", submit_index, trackers); + } + + // now prepare the GPU submission + let fence = device.raw.create_fence(false).unwrap(); + let submission = hal::queue::Submission { + command_buffers: pending_write_command_buffer.as_ref().into_iter().chain( + command_buffer_ids + .iter() + .flat_map(|&cmb_id| &command_buffer_guard[cmb_id].raw), + ), + wait_semaphores: Vec::new(), + signal_semaphores: signal_swapchain_semaphores + .into_iter() + .map(|sc_id| &swap_chain_guard[sc_id].semaphore), + }; + + unsafe { + device.queue_group.queues[0].submit(submission, Some(&fence)); + } + fence + }; + + if let Some(comb_raw) = pending_write_command_buffer { + device + .com_allocator + .after_submit_internal(comb_raw, submit_index); + } + + let callbacks = device.maintain(self, false, &mut token); + super::Device::lock_life_internal(&device.life_tracker, &mut token).track_submission( + submit_index, + fence, + &device.temp_suspected, + device.pending_writes.temp_buffers.drain(..), + ); + + // finally, return the command buffers to the allocator + for &cmb_id in command_buffer_ids { + let (cmd_buf, _) = hub.command_buffers.unregister(cmb_id, &mut token); + device.com_allocator.after_submit(cmd_buf, submit_index); + } + + callbacks + }; + + super::fire_map_callbacks(callbacks); + } +} diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 93efefbdea..b3afbe6703 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -166,6 +166,7 @@ pub enum Action { id: id::BufferId, data: FileName, range: Range, + queued: bool, }, Submit(crate::SubmissionIndex, Vec), }