From b55cd162bfeccc987b5861ce748585df21882db5 Mon Sep 17 00:00:00 2001 From: Layl <2385329-layl@users.noreply.gitlab.com> Date: Tue, 31 Dec 2019 22:52:02 +0100 Subject: [PATCH 1/2] Defer command buffer recycling until allocate --- wgpu-core/src/command/allocator.rs | 77 ++++++++++++++++-------------- wgpu-core/src/device.rs | 14 ++++-- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs index de8af9a0c5..2ed4de1221 100644 --- a/wgpu-core/src/command/allocator.rs +++ b/wgpu-core/src/command/allocator.rs @@ -22,9 +22,37 @@ use std::{collections::HashMap, sync::atomic::Ordering, thread}; struct CommandPool { raw: B::CommandPool, available: Vec, + pending: Vec>, } impl CommandPool { + fn maintain(&mut self, last_done: usize) { + for i in (0 .. self.pending.len()).rev() { + let index = self.pending[i] + .life_guard + .submission_index + .load(Ordering::Acquire); + if index <= last_done { + let cmd_buf = self.pending.swap_remove(i); + log::trace!( + "recycling comb submitted in {} when {} is done", + index, + last_done + ); + self.recycle(cmd_buf); + } + } + } + + fn recycle(&mut self, cmd_buf: CommandBuffer) { + for mut raw in cmd_buf.raw { + unsafe { + raw.reset(false); + } + self.available.push(raw); + } + } + fn allocate(&mut self) -> B::CommandBuffer { if self.available.is_empty() { let extra = unsafe { self.raw.allocate_vec(20, hal::command::Level::Primary) }; @@ -37,20 +65,9 @@ impl CommandPool { #[derive(Debug)] struct Inner { + // TODO: Currently pools from threads that are stopped or no longer call into wgpu will never be + // cleaned up. pools: HashMap>, - pending: Vec>, -} - -impl Inner { - fn recycle(&mut self, cmd_buf: CommandBuffer) { - let pool = self.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap(); - for mut raw in cmd_buf.raw { - unsafe { - raw.reset(false); - } - pool.available.push(raw); - } - } } #[derive(Debug)] @@ -65,6 +82,7 @@ impl CommandAllocator { device_id: Stored, device: &B::Device, features: Features, + last_done: usize, ) -> CommandBuffer { //debug_assert_eq!(device_id.backend(), B::VARIANT); let thread_id = thread::current().id(); @@ -79,7 +97,12 @@ impl CommandAllocator { } .unwrap(), available: Vec::new(), + pending: Vec::new(), }); + + // Recycle completed command buffers + pool.maintain(last_done); + let init = pool.allocate(); CommandBuffer { @@ -101,7 +124,6 @@ impl CommandAllocator { queue_family, inner: Mutex::new(Inner { pools: HashMap::new(), - pending: Vec::new(), }), } } @@ -124,34 +146,19 @@ impl CommandAllocator { .life_guard .submission_index .store(submit_index, Ordering::Release); - self.inner.lock().pending.push(cmd_buf); - } - pub fn maintain(&self, last_done: SubmissionIndex) { + // Record this command buffer as pending let mut inner = self.inner.lock(); - for i in (0 .. inner.pending.len()).rev() { - let index = inner.pending[i] - .life_guard - .submission_index - .load(Ordering::Acquire); - if index <= last_done { - let cmd_buf = inner.pending.swap_remove(i); - log::trace!( - "recycling comb submitted in {} when {} is done", - index, - last_done - ); - inner.recycle(cmd_buf); - } - } + let pool = inner.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap(); + pool.pending.push(cmd_buf); } pub fn destroy(self, device: &B::Device) { let mut inner = self.inner.lock(); - while let Some(cmd_buf) = inner.pending.pop() { - inner.recycle(cmd_buf); - } for (_, mut pool) in inner.pools.drain() { + while let Some(cmd_buf) = pool.pending.pop() { + pool.recycle(cmd_buf); + } unsafe { pool.raw.free(pool.available); device.destroy_command_pool(pool.raw); diff --git a/wgpu-core/src/device.rs b/wgpu-core/src/device.rs index b48fd63354..dc918d679c 100644 --- a/wgpu-core/src/device.rs +++ b/wgpu-core/src/device.rs @@ -516,6 +516,9 @@ pub struct Device { pub(crate) framebuffers: Mutex>, pending: Mutex>, pub(crate) features: Features, + /// The last submission index that is done, used to reclaim command buffers on encoder creation. + /// Because of AtomicUsize not having fetch_max stabilized, this has to be a mutex right now. + last_done: Mutex, } impl Device { @@ -575,6 +578,7 @@ impl Device { max_bind_groups, supports_texture_d24_s8, }, + last_done: Mutex::new(0), } } @@ -596,16 +600,16 @@ impl Device { &self.desc_allocator, force_wait, ); + { + let mut last_done_guard = self.last_done.lock(); + *last_done_guard = last_done_guard.max(last_done); + } let callbacks = pending.handle_mapping(global, &self.raw, token); unsafe { self.desc_allocator.lock().cleanup(&self.raw); } - if last_done != 0 { - self.com_allocator.maintain(last_done); - } - callbacks } @@ -1562,7 +1566,7 @@ impl> Global { }; let mut comb = device .com_allocator - .allocate(dev_stored, &device.raw, device.features); + .allocate(dev_stored, &device.raw, device.features, *device.last_done.lock()); unsafe { comb.raw.last_mut().unwrap().begin( hal::command::CommandBufferFlags::ONE_TIME_SUBMIT, From ac8f75288f331db600a2a4557bf63545cf4c5f12 Mon Sep 17 00:00:00 2001 From: Layl <2385329-layl@users.noreply.gitlab.com> Date: Thu, 2 Jan 2020 01:24:02 +0100 Subject: [PATCH 2/2] Use current active list instead of storing last done --- wgpu-core/src/command/allocator.rs | 10 +++++----- wgpu-core/src/device.rs | 28 ++++++++++------------------ 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs index 2ed4de1221..92ac46d27d 100644 --- a/wgpu-core/src/command/allocator.rs +++ b/wgpu-core/src/command/allocator.rs @@ -26,18 +26,18 @@ struct CommandPool { } impl CommandPool { - fn maintain(&mut self, last_done: usize) { + fn maintain(&mut self, lowest_active_index: SubmissionIndex) { for i in (0 .. self.pending.len()).rev() { let index = self.pending[i] .life_guard .submission_index .load(Ordering::Acquire); - if index <= last_done { + if index < lowest_active_index { let cmd_buf = self.pending.swap_remove(i); log::trace!( "recycling comb submitted in {} when {} is done", index, - last_done + lowest_active_index, ); self.recycle(cmd_buf); } @@ -82,7 +82,7 @@ impl CommandAllocator { device_id: Stored, device: &B::Device, features: Features, - last_done: usize, + lowest_active_index: SubmissionIndex, ) -> CommandBuffer { //debug_assert_eq!(device_id.backend(), B::VARIANT); let thread_id = thread::current().id(); @@ -101,7 +101,7 @@ impl CommandAllocator { }); // Recycle completed command buffers - pool.maintain(last_done); + pool.maintain(lowest_active_index); let init = pool.allocate(); diff --git a/wgpu-core/src/device.rs b/wgpu-core/src/device.rs index dc918d679c..062930e60c 100644 --- a/wgpu-core/src/device.rs +++ b/wgpu-core/src/device.rs @@ -200,7 +200,7 @@ impl PendingResources { heaps_mutex: &Mutex>, descriptor_allocator_mutex: &Mutex>, force_wait: bool, - ) -> SubmissionIndex { + ) { if force_wait && !self.active.is_empty() { let status = unsafe { device.wait_for_fences( @@ -219,11 +219,6 @@ impl PendingResources { .iter() .position(|a| unsafe { !device.get_fence_status(&a.fence).unwrap() }) .unwrap_or(self.active.len()); - let last_done = if done_count != 0 { - self.active[done_count - 1].index - } else { - return 0; - }; for a in self.active.drain(.. done_count) { log::trace!("Active submission {} is done", a.index); @@ -260,8 +255,6 @@ impl PendingResources { }, } } - - last_done } fn triage_referenced( @@ -516,9 +509,6 @@ pub struct Device { pub(crate) framebuffers: Mutex>, pending: Mutex>, pub(crate) features: Features, - /// The last submission index that is done, used to reclaim command buffers on encoder creation. - /// Because of AtomicUsize not having fetch_max stabilized, this has to be a mutex right now. - last_done: Mutex, } impl Device { @@ -578,7 +568,6 @@ impl Device { max_bind_groups, supports_texture_d24_s8, }, - last_done: Mutex::new(0), } } @@ -594,16 +583,12 @@ impl Device { pending.triage_referenced(global, &mut *trackers, token); pending.triage_mapped(global, token); pending.triage_framebuffers(global, &mut *self.framebuffers.lock(), token); - let last_done = pending.cleanup( + pending.cleanup( &self.raw, &self.mem_allocator, &self.desc_allocator, force_wait, ); - { - let mut last_done_guard = self.last_done.lock(); - *last_done_guard = last_done_guard.max(last_done); - } let callbacks = pending.handle_mapping(global, &self.raw, token); unsafe { @@ -1564,9 +1549,16 @@ impl> Global { value: device_id, ref_count: device.life_guard.ref_count.clone(), }; + + // The first entry in the active list should have the lowest index + let lowest_active_index = device.pending.lock() + .active.get(0) + .map(|active| active.index) + .unwrap_or(0); + let mut comb = device .com_allocator - .allocate(dev_stored, &device.raw, device.features, *device.last_done.lock()); + .allocate(dev_stored, &device.raw, device.features, lowest_active_index); unsafe { comb.raw.last_mut().unwrap().begin( hal::command::CommandBufferFlags::ONE_TIME_SUBMIT,