From 875cfb59617fa625d734f099a34396af3d9e3f89 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 18 Jan 2021 21:49:24 +0100 Subject: [PATCH 01/18] buffers are now zero initialized for mappings queue_write_buffer marks as initialized --- Cargo.lock | 1 + wgpu-core/Cargo.toml | 1 + wgpu-core/src/device/mod.rs | 33 ++++++++++++++++++++++++++++++++- wgpu-core/src/device/queue.rs | 17 ++++++++++++++++- wgpu-core/src/resource.rs | 32 +++++++++++++++++++++++++++++++- 5 files changed, 81 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90c4ab1840..3b63ac9f7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1807,6 +1807,7 @@ dependencies = [ "loom", "naga", "parking_lot", + "range-alloc", "raw-window-handle", "ron", "serde", diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 453d779c68..8bdb81c342 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -38,6 +38,7 @@ gpu-descriptor = { version = "0.1", features = ["tracing"] } hal = { package = "gfx-hal", git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8" } gfx-backend-empty = { git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8" } +range-alloc = {git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8"} [target.'cfg(all(not(target_arch = "wasm32"), all(unix, not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] gfx-backend-vulkan = { git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8", features = ["naga"] } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 1a2fcc733c..f69b63e7c4 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -196,6 +196,30 @@ fn map_buffer( }), _ => None, }; + let writes_are_synced = buffer.sync_mapped_writes.is_some() || block.is_coherent(); + + // Zero out uninitialized parts of the mapping. (Spec dictates all resources behave as if they were initialized with zero) + // Note that going through a `fill_buffer` command may be faster but we don't have the chance of interacting with the queue + // between map intent (map_buffer_async or immediately) and this call. + let uninitialized_ranges: Vec> = + buffer.uninitialized_ranges_in_range(Range { + start: offset, + end: offset + size, + }); + for range in uninitialized_ranges { + unsafe { + ptr::write_bytes( + ptr.as_ptr().offset(range.start as isize), + 0, + (range.end - range.start) as usize, + ) + }; + // (technically the buffer is only initialized when we're done unmapping but we know it can't be used otherwise meanwhile) + if writes_are_synced { + buffer.mark_initialized(range); + } + } + Ok(ptr) } @@ -534,6 +558,13 @@ impl Device { .allocate(&self.raw, requirements, mem_usage)?; block.bind_buffer(&self.raw, &mut buffer)?; + let mut uninitialized_ranges = range_alloc::RangeAllocator::new(Range { + start: 0, + end: desc.size, + }); + // Mark buffer as uninitialized + let _ = uninitialized_ranges.allocate_range(desc.size); + Ok(resource::Buffer { raw: Some((buffer, block)), device_id: Stored { @@ -542,7 +573,7 @@ impl Device { }, usage: desc.usage, size: desc.size, - full_range: (), + uninitialized_ranges, sync_mapped_writes: None, map_state: resource::BufferMapState::Idle, life_guard: LifeGuard::new(desc.label.borrow_or_default()), diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index dbc53f28a4..fa09be5cfc 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -191,7 +191,7 @@ impl Global { let device = device_guard .get_mut(queue_id) .map_err(|_| DeviceError::Invalid)?; - let (buffer_guard, _) = hub.buffers.read(&mut token); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { @@ -271,6 +271,21 @@ impl Global { device.pending_writes.consume(stage); device.pending_writes.dst_buffers.insert(buffer_id); + // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. + { + let dst = buffer_guard.get_mut(buffer_id).unwrap(); + for range in dst + .uninitialized_ranges_in_range::>>( + std::ops::Range { + start: buffer_offset, + end: buffer_offset + data_size, + }, + ) + { + dst.mark_initialized(range); + } + } + Ok(()) } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 23aab5c865..0510a3d776 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -160,12 +160,42 @@ pub struct Buffer { pub(crate) device_id: Stored, pub(crate) usage: wgt::BufferUsage, pub(crate) size: wgt::BufferAddress, - pub(crate) full_range: (), + // An allocated range in this allocator means that the range in question is NOT yet initialized. + pub(crate) uninitialized_ranges: range_alloc::RangeAllocator, pub(crate) sync_mapped_writes: Option, pub(crate) life_guard: LifeGuard, pub(crate) map_state: BufferMapState, } +impl Buffer { + pub(crate) fn uninitialized_ranges_in_range< + 'a, + R: std::iter::FromIterator>, + >( + &self, + range: Range, + ) -> R { + self.uninitialized_ranges + .allocated_ranges() + .filter_map(|r: Range| { + if r.end > range.start && r.start < range.end { + Some(Range { + start: range.start.max(r.start), + end: range.end.min(r.end), + }) + } else { + None + } + }) + .collect::() + } + + // Range must be continuous previously uninitialized section. + pub(crate) fn mark_initialized(&mut self, range: Range) { + self.uninitialized_ranges.free_range(range); + } +} + #[derive(Clone, Debug, Error)] pub enum CreateBufferError { #[error(transparent)] From a51b4f92325699a4b7c1920ca86c8f71eb5a2db4 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Jan 2021 22:59:34 +0100 Subject: [PATCH 02/18] buffer zero initialization prior to use in submit --- wgpu-core/src/command/mod.rs | 30 ++++++++++++++++++++++++++++++ wgpu-core/src/device/queue.rs | 8 +++++++- wgpu-core/src/resource.rs | 7 ++++++- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index b98ce39598..03999fad5d 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -78,6 +78,36 @@ impl CommandBuffer { } } + pub(crate) fn insert_zero_initializations( + raw: &mut B::CommandBuffer, + head: &TrackerSet, + buffer_guard: &mut Storage, id::BufferId>, + // TODO: Textures also need initialization! + _texture_guard: &mut Storage, id::TextureId>, + ) { + // Make sure all used buffers are fully initialized. + // TODO This is not optimal since buffer usage may be target of a copy operation. + for id in head.buffers.used() { + let buffer = &mut buffer_guard[id]; + let uninitialized_ranges: Vec> = + buffer.uninitialized_ranges().collect(); + for uninitialized_range in uninitialized_ranges { + // TODO this itself needs resource barriers. How can we make this work? + unsafe { + raw.fill_buffer( + &buffer.raw.as_ref().unwrap().0, + hal::buffer::SubRange { + offset: uninitialized_range.start, + size: Some(uninitialized_range.end - uninitialized_range.start), + }, + 0, + ); + } + buffer.mark_initialized(uninitialized_range); + } + } + } + pub(crate) fn insert_barriers( raw: &mut B::CommandBuffer, base: &mut TrackerSet, diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index fa09be5cfc..86b9a21bc6 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -505,7 +505,7 @@ impl Global { let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); let (mut buffer_guard, mut token) = hub.buffers.write(&mut token); - let (texture_guard, mut token) = hub.textures.read(&mut token); + let (mut texture_guard, mut token) = hub.textures.write(&mut token); let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); let (sampler_guard, _) = hub.samplers.read(&mut token); @@ -613,6 +613,12 @@ impl Global { .begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); } tracing::trace!("Stitching command buffer {:?} before submission", cmb_id); + CommandBuffer::insert_zero_initializations( + &mut transit, + &cmdbuf.trackers, + &mut *buffer_guard, + &mut *texture_guard, + ); CommandBuffer::insert_barriers( &mut transit, &mut *trackers, diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 0510a3d776..ec21f9d736 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -169,7 +169,6 @@ pub struct Buffer { impl Buffer { pub(crate) fn uninitialized_ranges_in_range< - 'a, R: std::iter::FromIterator>, >( &self, @@ -190,6 +189,12 @@ impl Buffer { .collect::() } + pub(crate) fn uninitialized_ranges<'a>( + &'a self, + ) -> impl 'a + Iterator> { + self.uninitialized_ranges.allocated_ranges() + } + // Range must be continuous previously uninitialized section. pub(crate) fn mark_initialized(&mut self, range: Range) { self.uninitialized_ranges.free_range(range); From 076aecd1507653135272efaff7d60aa063fb1e77 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Jan 2021 09:57:55 +0100 Subject: [PATCH 03/18] buffer map permeates zero init now in any case --- wgpu-core/src/device/mod.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index f69b63e7c4..e9f9de1117 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -196,27 +196,28 @@ fn map_buffer( }), _ => None, }; - let writes_are_synced = buffer.sync_mapped_writes.is_some() || block.is_coherent(); // Zero out uninitialized parts of the mapping. (Spec dictates all resources behave as if they were initialized with zero) - // Note that going through a `fill_buffer` command may be faster but we don't have the chance of interacting with the queue - // between map intent (map_buffer_async or immediately) and this call. + // + // If this is a read mapping, ideally we would use a `fill_buffer` command before reading the data from GPU (i.e. `invalidate_range`). + // However, this would require us to kick off and wait for a command buffer or piggy back on an existing one (the later is likely the only worthwhile option). + // As reading uninitialized memory isn't a particular important path to support, + // we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region. + // + // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. + let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. + let uninitialized_ranges: Vec> = buffer.uninitialized_ranges_in_range(Range { start: offset, end: offset + size, }); for range in uninitialized_ranges { - unsafe { - ptr::write_bytes( - ptr.as_ptr().offset(range.start as isize), - 0, - (range.end - range.start) as usize, - ) - }; + let size = (range.end - range.start) as usize; + unsafe { ptr::write_bytes(ptr.as_ptr().offset(range.start as isize), 0, size) }; // (technically the buffer is only initialized when we're done unmapping but we know it can't be used otherwise meanwhile) - if writes_are_synced { - buffer.mark_initialized(range); + if zero_init_needs_flush_now { + block.flush_range(raw, range.start, Some(size))?; } } From 5133b0da9492a88f3b165b6770a12dc033ff5084 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Jan 2021 11:34:28 +0100 Subject: [PATCH 04/18] Introduced separate MemoryInitTracker --- wgpu-core/src/command/mod.rs | 16 ++++--- wgpu-core/src/device/mod.rs | 43 ++++++++++-------- wgpu-core/src/device/queue.rs | 17 +++----- wgpu-core/src/lib.rs | 1 + wgpu-core/src/memory_init_tracker.rs | 65 ++++++++++++++++++++++++++++ wgpu-core/src/resource.rs | 38 +--------------- 6 files changed, 109 insertions(+), 71 deletions(-) create mode 100644 wgpu-core/src/memory_init_tracker.rs diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 03999fad5d..cbd50bd45b 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -89,21 +89,25 @@ impl CommandBuffer { // TODO This is not optimal since buffer usage may be target of a copy operation. for id in head.buffers.used() { let buffer = &mut buffer_guard[id]; - let uninitialized_ranges: Vec> = - buffer.uninitialized_ranges().collect(); - for uninitialized_range in uninitialized_ranges { + for uninitialized_segment in + buffer + .initialization_status + .drain_uninitialized_segments(hal::memory::Segment { + offset: 0, + size: None, + }) + { // TODO this itself needs resource barriers. How can we make this work? unsafe { raw.fill_buffer( &buffer.raw.as_ref().unwrap().0, hal::buffer::SubRange { - offset: uninitialized_range.start, - size: Some(uninitialized_range.end - uninitialized_range.start), + offset: uninitialized_segment.offset, + size: uninitialized_segment.size, }, 0, ); } - buffer.mark_initialized(uninitialized_range); } } } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index e9f9de1117..eb8fd2e883 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -8,7 +8,9 @@ use crate::{ hub::{ GfxBackend, Global, GlobalIdentityHandlerFactory, Hub, Input, InvalidId, Storage, Token, }, - id, instance, pipeline, resource, span, swap_chain, + id, instance, + memory_init_tracker::MemoryInitTracker, + pipeline, resource, span, swap_chain, track::{BufferState, TextureSelector, TextureState, TrackerSet}, validation::{self, check_buffer_usage, check_texture_usage}, FastHashMap, FastHashSet, Label, LabelHelpers, LifeGuard, MultiRefCount, PrivateFeatures, @@ -207,17 +209,27 @@ fn map_buffer( // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. - let uninitialized_ranges: Vec> = - buffer.uninitialized_ranges_in_range(Range { - start: offset, - end: offset + size, - }); - for range in uninitialized_ranges { - let size = (range.end - range.start) as usize; - unsafe { ptr::write_bytes(ptr.as_ptr().offset(range.start as isize), 0, size) }; - // (technically the buffer is only initialized when we're done unmapping but we know it can't be used otherwise meanwhile) + for uninitialized_segment in + buffer + .initialization_status + .drain_uninitialized_segments(hal::memory::Segment { + offset, + size: Some(size), + }) + { + unsafe { + ptr::write_bytes( + ptr.as_ptr().offset(uninitialized_segment.offset as isize), + 0, + uninitialized_segment.size.unwrap_or(size) as usize, + ) + }; if zero_init_needs_flush_now { - block.flush_range(raw, range.start, Some(size))?; + block.flush_range( + raw, + uninitialized_segment.offset, + uninitialized_segment.size, + )?; } } @@ -559,13 +571,6 @@ impl Device { .allocate(&self.raw, requirements, mem_usage)?; block.bind_buffer(&self.raw, &mut buffer)?; - let mut uninitialized_ranges = range_alloc::RangeAllocator::new(Range { - start: 0, - end: desc.size, - }); - // Mark buffer as uninitialized - let _ = uninitialized_ranges.allocate_range(desc.size); - Ok(resource::Buffer { raw: Some((buffer, block)), device_id: Stored { @@ -574,7 +579,7 @@ impl Device { }, usage: desc.usage, size: desc.size, - uninitialized_ranges, + initialization_status: MemoryInitTracker::new(desc.size), sync_mapped_writes: None, map_state: resource::BufferMapState::Idle, life_guard: LifeGuard::new(desc.label.borrow_or_default()), diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 86b9a21bc6..444bc79b6f 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -274,16 +274,13 @@ impl Global { // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. { let dst = buffer_guard.get_mut(buffer_id).unwrap(); - for range in dst - .uninitialized_ranges_in_range::>>( - std::ops::Range { - start: buffer_offset, - end: buffer_offset + data_size, - }, - ) - { - dst.mark_initialized(range); - } + for _ in dst + .initialization_status + .drain_uninitialized_segments(hal::memory::Segment { + offset: buffer_offset, + size: Some(buffer_offset + data_size), + }) + {} } Ok(()) diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index bd099a0fbe..6b102ce3e9 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -36,6 +36,7 @@ pub mod device; pub mod hub; pub mod id; pub mod instance; +mod memory_init_tracker; pub mod pipeline; pub mod resource; pub mod swap_chain; diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs new file mode 100644 index 0000000000..a42dc480c8 --- /dev/null +++ b/wgpu-core/src/memory_init_tracker.rs @@ -0,0 +1,65 @@ +use std::ops::Range; + +use hal::memory::Segment; + +/// Tracks initialization status of a linear range +#[derive(Debug)] +pub(crate) struct MemoryInitTracker { + // TODO: Use a more fitting data structure! + // An allocated range in this allocator means that the range in question is NOT yet initialized. + uninitialized_ranges: range_alloc::RangeAllocator, +} + +impl MemoryInitTracker { + pub(crate) fn new(size: wgt::BufferAddress) -> Self { + let mut uninitialized_ranges = + range_alloc::RangeAllocator::::new(0..size); + let _ = uninitialized_ranges.allocate_range(size); + + Self { + uninitialized_ranges, + } + } + + pub(crate) fn drain_uninitialized_segments<'a>( + &'a mut self, + segment: Segment, + ) -> impl Iterator + 'a { + let range = match segment.size { + Some(size) => segment.offset..(segment.offset + size), + None => segment.offset..self.uninitialized_ranges.initial_range().end, + }; + + let mut uninitialized_ranges: Vec> = self + .uninitialized_ranges + .allocated_ranges() + .filter_map(|r: Range| { + if r.end > range.start && r.start < range.end { + Some(Range { + start: range.start.max(r.start), + end: range.end.min(r.end), + }) + } else { + None + } + }) + .collect(); + + std::iter::from_fn(move || { + let range: Option> = + uninitialized_ranges.last().map(|r| r.clone()); + match range { + Some(range) => { + uninitialized_ranges.pop(); + let result = Some(Segment { + offset: range.start, + size: Some(range.end - range.start), + }); + self.uninitialized_ranges.free_range(range); + result + } + None => None, + } + }) + } +} diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index ec21f9d736..310bdd7a1a 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -6,6 +6,7 @@ use crate::{ device::{alloc::MemoryBlock, DeviceError, HostMap}, hub::Resource, id::{DeviceId, SwapChainId, TextureId}, + memory_init_tracker::MemoryInitTracker, track::{TextureSelector, DUMMY_SELECTOR}, validation::MissingBufferUsageError, Label, LifeGuard, RefCount, Stored, @@ -160,47 +161,12 @@ pub struct Buffer { pub(crate) device_id: Stored, pub(crate) usage: wgt::BufferUsage, pub(crate) size: wgt::BufferAddress, - // An allocated range in this allocator means that the range in question is NOT yet initialized. - pub(crate) uninitialized_ranges: range_alloc::RangeAllocator, + pub(crate) initialization_status: MemoryInitTracker, pub(crate) sync_mapped_writes: Option, pub(crate) life_guard: LifeGuard, pub(crate) map_state: BufferMapState, } -impl Buffer { - pub(crate) fn uninitialized_ranges_in_range< - R: std::iter::FromIterator>, - >( - &self, - range: Range, - ) -> R { - self.uninitialized_ranges - .allocated_ranges() - .filter_map(|r: Range| { - if r.end > range.start && r.start < range.end { - Some(Range { - start: range.start.max(r.start), - end: range.end.min(r.end), - }) - } else { - None - } - }) - .collect::() - } - - pub(crate) fn uninitialized_ranges<'a>( - &'a self, - ) -> impl 'a + Iterator> { - self.uninitialized_ranges.allocated_ranges() - } - - // Range must be continuous previously uninitialized section. - pub(crate) fn mark_initialized(&mut self, range: Range) { - self.uninitialized_ranges.free_range(range); - } -} - #[derive(Clone, Debug, Error)] pub enum CreateBufferError { #[error(transparent)] From 9595b39bb369a4efd2a3b0f00b75f56d215098c1 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Jan 2021 22:47:09 +0100 Subject: [PATCH 05/18] Fine grained tracking of buffer init requirements --- wgpu-core/src/binding_model.rs | 2 ++ wgpu-core/src/command/allocator.rs | 1 + wgpu-core/src/command/bundle.rs | 26 ++++++++++++++ wgpu-core/src/command/compute.rs | 15 ++++++++ wgpu-core/src/command/mod.rs | 36 ++------------------ wgpu-core/src/command/render.rs | 51 ++++++++++++++++++++++++++-- wgpu-core/src/command/transfer.rs | 49 +++++++++++++++++++++++--- wgpu-core/src/device/mod.rs | 11 +++++- wgpu-core/src/device/queue.rs | 11 +++--- wgpu-core/src/memory_init_tracker.rs | 19 +++++++++-- 10 files changed, 171 insertions(+), 50 deletions(-) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index da9f33200c..1490b3dffb 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -9,6 +9,7 @@ use crate::{ }, hub::Resource, id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureViewId, Valid}, + memory_init_tracker::ResourceMemoryInitTrackerAction, track::{TrackerSet, DUMMY_SELECTOR}, validation::{MissingBufferUsageError, MissingTextureUsageError}, FastHashMap, Label, LifeGuard, MultiRefCount, Stored, MAX_BIND_GROUPS, @@ -588,6 +589,7 @@ pub struct BindGroup { pub(crate) layout_id: Valid, pub(crate) life_guard: LifeGuard, pub(crate) used: TrackerSet, + pub(crate) used_buffer_ranges: Vec>, pub(crate) dynamic_binding_info: Vec, } diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs index f00c5d9eda..fb75c69c19 100644 --- a/wgpu-core/src/command/allocator.rs +++ b/wgpu-core/src/command/allocator.rs @@ -123,6 +123,7 @@ impl CommandAllocator { device_id, trackers: TrackerSet::new(B::VARIANT), used_swap_chains: Default::default(), + used_buffer_ranges: Default::default(), limits, private_features, has_labels: label.is_some(), diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index dcdb5faa30..8d15b09ae7 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -49,6 +49,7 @@ use crate::{ }, hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token}, id, + memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, resource::BufferUse, span, track::{TrackerSet, UsageConflict}, @@ -159,6 +160,7 @@ impl RenderBundleEncoder { let mut commands = Vec::new(); let mut base = self.base.as_ref(); let mut pipeline_layout_id = None::>; + let mut used_buffer_ranges = Vec::new(); for &command in base.commands { match command { @@ -204,6 +206,9 @@ impl RenderBundleEncoder { .map_pass_err(scope); } + used_buffer_ranges + .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); + state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets); state .trackers @@ -262,6 +267,10 @@ impl RenderBundleEncoder { Some(s) => offset + s.get(), None => buffer.size, }; + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end), + }); state.index.set_format(index_format); state.index.set_buffer(buffer_id, offset..end); } @@ -284,6 +293,10 @@ impl RenderBundleEncoder { Some(s) => offset + s.get(), None => buffer.size, }; + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end), + }); state.vertex[slot as usize].set_buffer(buffer_id, offset..end); } RenderCommand::SetPushConstant { @@ -396,6 +409,11 @@ impl RenderBundleEncoder { check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) .map_pass_err(scope)?; + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory(0..buffer.size), + }); + commands.extend(state.flush_vertices()); commands.extend(state.flush_binds()); commands.push(command); @@ -420,6 +438,12 @@ impl RenderBundleEncoder { check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) .map_pass_err(scope)?; + let stride = 4 * 4; // 4 integers, vertexCount, instanceCount, firstVertex, firstInstance + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory(0..stride), + }); + commands.extend(state.index.flush()); commands.extend(state.flush_vertices()); commands.extend(state.flush_binds()); @@ -454,6 +478,7 @@ impl RenderBundleEncoder { ref_count: device.life_guard.add_ref(), }, used: state.trackers, + used_buffer_ranges, context: self.context, life_guard: LifeGuard::new(desc.label.borrow_or_default()), }) @@ -502,6 +527,7 @@ pub struct RenderBundle { base: BasePass, pub(crate) device_id: Stored, pub(crate) used: TrackerSet, + pub(crate) used_buffer_ranges: Vec>, pub(crate) context: RenderPassContext, pub(crate) life_guard: LifeGuard, } diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index ba19a2f0fc..d5f41e1640 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -11,6 +11,7 @@ use crate::{ }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, + memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, resource::{Buffer, BufferUse, Texture}, span, track::{TrackerSet, UsageConflict}, @@ -329,6 +330,10 @@ impl Global { .validate_dynamic_bindings(&temp_offsets) .map_pass_err(scope)?; + cmd_buf + .used_buffer_ranges + .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); + if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( index as usize, id::Valid(bind_group_id), @@ -513,6 +518,16 @@ impl Global { .ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) .map_pass_err(scope)?; + let stride = 3 * 4; // 3 integers, x/y/z group size + cmd_buf + .used_buffer_ranges + .push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + offset..(offset + stride), + ), + }); + state .flush_states( raw, diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index cbd50bd45b..88c3df314c 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -24,6 +24,7 @@ use crate::{ device::{all_buffer_stages, all_image_stages}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, + memory_init_tracker::ResourceMemoryInitTrackerAction, resource::{Buffer, Texture}, span, track::TrackerSet, @@ -46,6 +47,7 @@ pub struct CommandBuffer { pub(crate) device_id: Stored, pub(crate) trackers: TrackerSet, pub(crate) used_swap_chains: SmallVec<[Stored; 1]>, + pub(crate) used_buffer_ranges: Vec>, limits: wgt::Limits, private_features: PrivateFeatures, has_labels: bool, @@ -78,40 +80,6 @@ impl CommandBuffer { } } - pub(crate) fn insert_zero_initializations( - raw: &mut B::CommandBuffer, - head: &TrackerSet, - buffer_guard: &mut Storage, id::BufferId>, - // TODO: Textures also need initialization! - _texture_guard: &mut Storage, id::TextureId>, - ) { - // Make sure all used buffers are fully initialized. - // TODO This is not optimal since buffer usage may be target of a copy operation. - for id in head.buffers.used() { - let buffer = &mut buffer_guard[id]; - for uninitialized_segment in - buffer - .initialization_status - .drain_uninitialized_segments(hal::memory::Segment { - offset: 0, - size: None, - }) - { - // TODO this itself needs resource barriers. How can we make this work? - unsafe { - raw.fill_buffer( - &buffer.raw.as_ref().unwrap().0, - hal::buffer::SubRange { - offset: uninitialized_segment.offset, - size: uninitialized_segment.size, - }, - 0, - ); - } - } - } - } - pub(crate) fn insert_barriers( raw: &mut B::CommandBuffer, base: &mut TrackerSet, diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index cf4292e7ec..05ae085d7d 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -17,6 +17,7 @@ use crate::{ }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, + memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, pipeline::PipelineFlags, resource::{BufferUse, Texture, TextureUse, TextureView, TextureViewInner}, span, @@ -1024,7 +1025,7 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); - let (cmd_buf_raw, trackers, used_swapchain, query_reset_state) = { + let (cmd_buf_raw, trackers, used_swapchain, used_buffer_ranges, query_reset_state) = { // read-only lock guard let (cmb_guard, mut token) = hub.command_buffers.read(&mut token); @@ -1079,6 +1080,7 @@ impl Global { let mut string_offset = 0; let mut active_query = None; let mut query_reset_state = QueryResetMap::new(); + let mut used_buffer_ranges = Vec::new(); for command in base.commands { match *command { @@ -1117,6 +1119,9 @@ impl Global { .merge_extend(&bind_group.used) .map_pass_err(scope)?; + used_buffer_ranges + .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); + if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( index as usize, id::Valid(bind_group_id), @@ -1293,6 +1298,11 @@ impl Global { state.index.format = Some(index_format); state.index.update_limit(); + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end), + }); + let range = hal::buffer::SubRange { offset, size: Some(end - offset), @@ -1336,6 +1346,13 @@ impl Global { }; vertex_state.bound = true; + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + offset..(offset + vertex_state.total_size), + ), + }); + let range = hal::buffer::SubRange { offset, size: size.map(|s| s.get()), @@ -1588,6 +1605,13 @@ impl Global { .map_pass_err(scope); } + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + offset..end_offset, + ), + }); + match indexed { false => unsafe { raw.draw_indirect( @@ -1683,6 +1707,19 @@ impl Global { .map_pass_err(scope); } + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + offset..end_offset, + ), + }); + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: count_buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + count_buffer_offset..end_count_offset, + ), + }); + match indexed { false => unsafe { raw.draw_indirect_count( @@ -1815,6 +1852,9 @@ impl Global { .map_err(RenderPassErrorInner::IncompatibleRenderBundle) .map_pass_err(scope)?; + used_buffer_ranges + .extend(bundle.used_buffer_ranges.iter().map(|x| x.clone())); + unsafe { bundle.execute( &mut raw, @@ -1845,7 +1885,13 @@ impl Global { } let (trackers, used_swapchain) = info.finish(&*texture_guard).map_pass_err(scope)?; - (raw, trackers, used_swapchain, query_reset_state) + ( + raw, + trackers, + used_swapchain, + used_buffer_ranges, + query_reset_state, + ) }; let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); @@ -1856,6 +1902,7 @@ impl Global { CommandBuffer::get_encoder_mut(&mut *cmb_guard, encoder_id).map_pass_err(scope)?; cmd_buf.has_labels |= base.label.is_some(); cmd_buf.used_swap_chains.extend(used_swapchain); + cmd_buf.used_buffer_ranges.extend(used_buffer_ranges); #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index e19fcd3a87..c9db824266 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -10,6 +10,7 @@ use crate::{ device::{all_buffer_stages, all_image_stages}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id::{BufferId, CommandEncoderId, TextureId}, + memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, resource::{BufferUse, Texture, TextureErrorDimension, TextureUse}, span, track::TextureSelector, @@ -149,6 +150,7 @@ pub(crate) fn texture_copy_view_to_hal( } /// Function copied with minor modifications from webgpu standard https://gpuweb.github.io/gpuweb/#valid-texture-copy-range +/// If successful, returns number of buffer bytes required for this copy. pub(crate) fn validate_linear_texture_data( layout: &wgt::TextureDataLayout, format: wgt::TextureFormat, @@ -156,7 +158,7 @@ pub(crate) fn validate_linear_texture_data( buffer_side: CopySide, bytes_per_block: BufferAddress, copy_size: &Extent3d, -) -> Result<(), TransferError> { +) -> Result { // Convert all inputs to BufferAddress (u64) to prevent overflow issues let copy_width = copy_size.width as BufferAddress; let copy_height = copy_size.height as BufferAddress; @@ -217,7 +219,7 @@ pub(crate) fn validate_linear_texture_data( if copy_depth > 1 && rows_per_image == 0 { return Err(TransferError::InvalidRowsPerImage); } - Ok(()) + Ok(required_bytes_in_copy) } /// Function copied with minor modifications from webgpu standard https://gpuweb.github.io/gpuweb/#valid-texture-copy-range @@ -401,6 +403,24 @@ impl Global { return Ok(()); } + // Make sure source is initialized memory and mark dest as initialized. + cmd_buf + .used_buffer_ranges + .push(ResourceMemoryInitTrackerAction { + id: destination, + action: MemoryInitTrackerAction::ImplicitlyInitialized( + destination_offset..(destination_offset + size), + ), + }); + cmd_buf + .used_buffer_ranges + .push(ResourceMemoryInitTrackerAction { + id: source, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + source_offset..(source_offset + size), + ), + }); + let region = hal::command::BufferCopy { src: source_offset, dst: destination_offset, @@ -505,7 +525,7 @@ impl Global { CopySide::Destination, copy_size, )?; - validate_linear_texture_data( + let required_buffer_bytes_in_copy = validate_linear_texture_data( &source.layout, dst_texture.format, src_buffer.size, @@ -514,6 +534,16 @@ impl Global { copy_size, )?; + cmd_buf + .used_buffer_ranges + .push(ResourceMemoryInitTrackerAction { + id: source.buffer, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), + ), + }); + // TODO: Mark dest texture memory as implicitly initialized here. + let (block_width, _) = dst_texture.format.describe().block_dimensions; if !conv::is_valid_copy_dst_texture_format(dst_texture.format) { Err(TransferError::CopyToForbiddenTextureFormat( @@ -645,7 +675,7 @@ impl Global { CopySide::Source, copy_size, )?; - validate_linear_texture_data( + let required_buffer_bytes_in_copy = validate_linear_texture_data( &destination.layout, src_texture.format, dst_buffer.size, @@ -661,6 +691,17 @@ impl Global { ))? } + cmd_buf + .used_buffer_ranges + .push(ResourceMemoryInitTrackerAction { + id: destination.buffer, + action: MemoryInitTrackerAction::ImplicitlyInitialized( + destination.layout.offset + ..(destination.layout.offset + required_buffer_bytes_in_copy), + ), + }); + // TODO: Mark dest texture memory as required to be initialized here. + // WebGPU uses the physical size of the texture for copies whereas vulkan uses // the virtual size. We have passed validation, so it's safe to use the // image extent data directly. We want the provided copy size to be no larger than diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index eb8fd2e883..7047c6d4e1 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -9,7 +9,7 @@ use crate::{ GfxBackend, Global, GlobalIdentityHandlerFactory, Hub, Input, InvalidId, Storage, Token, }, id, instance, - memory_init_tracker::MemoryInitTracker, + memory_init_tracker::*, pipeline, resource, span, swap_chain, track::{BufferState, TextureSelector, TextureState, TrackerSet}, validation::{self, check_buffer_usage, check_texture_usage}, @@ -1274,6 +1274,7 @@ impl Device { // `BTreeMap` has ordered bindings as keys, which allows us to coalesce // the descriptor writes into a single transaction. let mut write_map = BTreeMap::new(); + let mut used_buffer_ranges = Vec::new(); for entry in desc.entries.iter() { let binding = entry.binding; // Find the corresponding declaration in the layout @@ -1366,6 +1367,13 @@ impl Device { return Err(Error::BindingZeroSize(bb.buffer_id)); } + used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + id: bb.buffer_id, + action: MemoryInitTrackerAction::NeedsInitializedMemory( + bb.offset..(bb.offset + bind_size), + ), + }); + let sub_range = hal::buffer::SubRange { offset: bb.offset, size: Some(bind_size), @@ -1581,6 +1589,7 @@ impl Device { layout_id: id::Valid(desc.layout), life_guard: LifeGuard::new(desc.label.borrow_or_default()), used, + used_buffer_ranges, dynamic_binding_info, }) } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 444bc79b6f..44e713967a 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -502,7 +502,7 @@ impl Global { let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); let (mut buffer_guard, mut token) = hub.buffers.write(&mut token); - let (mut texture_guard, mut token) = hub.textures.write(&mut token); + let (texture_guard, mut token) = hub.textures.write(&mut token); let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); let (sampler_guard, _) = hub.samplers.read(&mut token); @@ -601,6 +601,9 @@ impl Global { } } + // TODO + // go through memory_init_tracker of every buffer and gather which areas need to be cleared. + // execute resource transitions let mut transit = device.cmd_allocator.extend(cmdbuf); unsafe { @@ -610,12 +613,6 @@ impl Global { .begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); } tracing::trace!("Stitching command buffer {:?} before submission", cmb_id); - CommandBuffer::insert_zero_initializations( - &mut transit, - &cmdbuf.trackers, - &mut *buffer_guard, - &mut *texture_guard, - ); CommandBuffer::insert_barriers( &mut transit, &mut *trackers, diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index a42dc480c8..551621e917 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -1,8 +1,21 @@ +use hal::memory::Segment; use std::ops::Range; -use hal::memory::Segment; +#[derive(Debug, Clone)] +pub(crate) enum MemoryInitTrackerAction { + // The memory range is going to be written by an already initialized source, thus doesn't need extra attention. + ImplicitlyInitialized(Range), + // The memory range is going to be read, therefore needs to ensure prior initialization. + NeedsInitializedMemory(Range), +} -/// Tracks initialization status of a linear range +#[derive(Debug, Clone)] +pub(crate) struct ResourceMemoryInitTrackerAction { + pub(crate) id: ResourceId, + pub(crate) action: MemoryInitTrackerAction, +} + +/// Tracks initialization status of a linear range from 0..size #[derive(Debug)] pub(crate) struct MemoryInitTracker { // TODO: Use a more fitting data structure! @@ -63,3 +76,5 @@ impl MemoryInitTracker { }) } } + +// TODO: Add some unit tests for this construct From 1efdc2b22925ee373aa89db33e5c091ebc5bf0dd Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 24 Jan 2021 16:04:07 +0100 Subject: [PATCH 06/18] Queue_submit zeros out uninitialized buffer regions now again but with fine grained scheme now --- wgpu-core/src/command/bundle.rs | 14 ++-- wgpu-core/src/command/compute.rs | 7 +- wgpu-core/src/command/render.rs | 25 +++---- wgpu-core/src/command/transfer.rs | 24 +++--- wgpu-core/src/device/mod.rs | 45 ++++++----- wgpu-core/src/device/queue.rs | 108 ++++++++++++++++++++++++--- wgpu-core/src/memory_init_tracker.rs | 37 +++++---- 7 files changed, 168 insertions(+), 92 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 8d15b09ae7..357da0ab75 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -49,7 +49,7 @@ use crate::{ }, hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token}, id, - memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, resource::BufferUse, span, track::{TrackerSet, UsageConflict}, @@ -269,7 +269,8 @@ impl RenderBundleEncoder { }; used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end), + range: offset..end, + kind: MemoryInitKind::NeedsInitializedMemory, }); state.index.set_format(index_format); state.index.set_buffer(buffer_id, offset..end); @@ -295,7 +296,8 @@ impl RenderBundleEncoder { }; used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end), + range: offset..end, + kind: MemoryInitKind::NeedsInitializedMemory, }); state.vertex[slot as usize].set_buffer(buffer_id, offset..end); } @@ -411,7 +413,8 @@ impl RenderBundleEncoder { used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory(0..buffer.size), + range: 0..buffer.size, + kind: MemoryInitKind::NeedsInitializedMemory, }); commands.extend(state.flush_vertices()); @@ -441,7 +444,8 @@ impl RenderBundleEncoder { let stride = 4 * 4; // 4 integers, vertexCount, instanceCount, firstVertex, firstInstance used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory(0..stride), + range: 0..stride, + kind: MemoryInitKind::NeedsInitializedMemory, }); commands.extend(state.index.flush()); diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index d5f41e1640..f9119fedce 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -11,7 +11,7 @@ use crate::{ }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, - memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, resource::{Buffer, BufferUse, Texture}, span, track::{TrackerSet, UsageConflict}, @@ -523,9 +523,8 @@ impl Global { .used_buffer_ranges .push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - offset..(offset + stride), - ), + range: offset..(offset + stride), + kind: MemoryInitKind::NeedsInitializedMemory, }); state diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 05ae085d7d..bf54a24a59 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -17,7 +17,7 @@ use crate::{ }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, - memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, pipeline::PipelineFlags, resource::{BufferUse, Texture, TextureUse, TextureView, TextureViewInner}, span, @@ -1300,7 +1300,8 @@ impl Global { used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end), + range: offset..end, + kind: MemoryInitKind::NeedsInitializedMemory, }); let range = hal::buffer::SubRange { @@ -1348,9 +1349,8 @@ impl Global { used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - offset..(offset + vertex_state.total_size), - ), + range: offset..(offset + vertex_state.total_size), + kind: MemoryInitKind::NeedsInitializedMemory, }); let range = hal::buffer::SubRange { @@ -1607,9 +1607,8 @@ impl Global { used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - offset..end_offset, - ), + range: offset..end_offset, + kind: MemoryInitKind::NeedsInitializedMemory, }); match indexed { @@ -1709,15 +1708,13 @@ impl Global { used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - offset..end_offset, - ), + range: offset..end_offset, + kind: MemoryInitKind::NeedsInitializedMemory, }); used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: count_buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - count_buffer_offset..end_count_offset, - ), + range: count_buffer_offset..end_count_offset, + kind: MemoryInitKind::NeedsInitializedMemory, }); match indexed { diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index c9db824266..2f59ca37d3 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -10,7 +10,7 @@ use crate::{ device::{all_buffer_stages, all_image_stages}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id::{BufferId, CommandEncoderId, TextureId}, - memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, resource::{BufferUse, Texture, TextureErrorDimension, TextureUse}, span, track::TextureSelector, @@ -408,17 +408,15 @@ impl Global { .used_buffer_ranges .push(ResourceMemoryInitTrackerAction { id: destination, - action: MemoryInitTrackerAction::ImplicitlyInitialized( - destination_offset..(destination_offset + size), - ), + range: destination_offset..(destination_offset + size), + kind: MemoryInitKind::ImplicitlyInitialized, }); cmd_buf .used_buffer_ranges .push(ResourceMemoryInitTrackerAction { id: source, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - source_offset..(source_offset + size), - ), + range: source_offset..(source_offset + size), + kind: MemoryInitKind::NeedsInitializedMemory, }); let region = hal::command::BufferCopy { @@ -538,9 +536,8 @@ impl Global { .used_buffer_ranges .push(ResourceMemoryInitTrackerAction { id: source.buffer, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), - ), + range: source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), + kind: MemoryInitKind::NeedsInitializedMemory, }); // TODO: Mark dest texture memory as implicitly initialized here. @@ -695,10 +692,9 @@ impl Global { .used_buffer_ranges .push(ResourceMemoryInitTrackerAction { id: destination.buffer, - action: MemoryInitTrackerAction::ImplicitlyInitialized( - destination.layout.offset - ..(destination.layout.offset + required_buffer_bytes_in_copy), - ), + range: destination.layout.offset + ..(destination.layout.offset + required_buffer_bytes_in_copy), + kind: MemoryInitKind::ImplicitlyInitialized, }); // TODO: Mark dest texture memory as required to be initialized here. diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 7047c6d4e1..4d6ec533fc 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -209,27 +209,22 @@ fn map_buffer( // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. - for uninitialized_segment in - buffer - .initialization_status - .drain_uninitialized_segments(hal::memory::Segment { - offset, - size: Some(size), - }) + if let Some(uninitialized_ranges) = buffer + .initialization_status + .drain_uninitialized_ranges(offset..(size + offset)) { - unsafe { - ptr::write_bytes( - ptr.as_ptr().offset(uninitialized_segment.offset as isize), - 0, - uninitialized_segment.size.unwrap_or(size) as usize, - ) - }; - if zero_init_needs_flush_now { - block.flush_range( - raw, - uninitialized_segment.offset, - uninitialized_segment.size, - )?; + for uninitialized_range in uninitialized_ranges { + let num_bytes = uninitialized_range.end - uninitialized_range.start; + unsafe { + ptr::write_bytes( + ptr.as_ptr().offset(uninitialized_range.start as isize), + 0, + num_bytes as usize, + ) + }; + if zero_init_needs_flush_now { + block.flush_range(raw, uninitialized_range.start, Some(num_bytes))?; + } } } @@ -507,6 +502,11 @@ impl Device { // we are going to be copying into it, internally usage |= hal::buffer::Usage::TRANSFER_DST; } + } else { + // We are required to zero out (initialize) all memory. + // This can be done via mapping, transfer or with a shader. However, right now we only implement it via transfer! + // TODO: Investigate if we could also do init via mapping or shader so that we need to alter the requested usage in less cases. + usage |= hal::buffer::Usage::TRANSFER_DST; } if desc.usage.is_empty() { @@ -1369,9 +1369,8 @@ impl Device { used_buffer_ranges.push(ResourceMemoryInitTrackerAction { id: bb.buffer_id, - action: MemoryInitTrackerAction::NeedsInitializedMemory( - bb.offset..(bb.offset + bind_size), - ), + range: bb.offset..(bb.offset + bind_size), + kind: MemoryInitKind::NeedsInitializedMemory, }); let sub_range = hal::buffer::SubRange { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 44e713967a..b5303ab16f 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -13,13 +13,14 @@ use crate::{ device::{alloc, DeviceError, WaitIdleError}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token}, id, + memory_init_tracker::MemoryInitKind, resource::{BufferAccessError, BufferMapState, BufferUse, TextureUse}, - span, FastHashSet, + span, FastHashMap, FastHashSet, }; use hal::{command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _}; use smallvec::SmallVec; -use std::{iter, ptr}; +use std::{iter, ops::Range, ptr}; use thiserror::Error; struct StagingData { @@ -274,13 +275,12 @@ impl Global { // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. { let dst = buffer_guard.get_mut(buffer_id).unwrap(); - for _ in dst + if let Some(uninitialized_ranges) = dst .initialization_status - .drain_uninitialized_segments(hal::memory::Segment { - offset: buffer_offset, - size: Some(buffer_offset + data_size), - }) - {} + .drain_uninitialized_ranges(buffer_offset..(buffer_offset + data_size)) + { + uninitialized_ranges.for_each(drop); + } } Ok(()) @@ -478,9 +478,96 @@ impl Global { span!(_guard, INFO, "Queue::submit"); let hub = B::hub(self); + let mut token = Token::root(); + + // Insert zero initializations if necessary. + // Since we push those to the pending_writes buffer, this needs to happen before any other processing + if !command_buffer_ids.is_empty() { + // Note that in the vast majority of cases this remains empty! + let required_buffer_inits = { + let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + + let mut required_buffer_inits: FastHashMap< + id::BufferId, + Vec>, + > = FastHashMap::default(); + + for &cmb_id in command_buffer_ids { + let cmdbuf = command_buffer_guard + .get(cmb_id) + .map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?; + + for buffer_use in cmdbuf.used_buffer_ranges.iter() { + let buffer = buffer_guard + .get_mut(buffer_use.id) + .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?; + + if let Some(uninitialized_ranges) = buffer + .initialization_status + .drain_uninitialized_ranges(buffer_use.range.clone()) + { + match buffer_use.kind { + MemoryInitKind::ImplicitlyInitialized => { + uninitialized_ranges.for_each(drop); + } + MemoryInitKind::NeedsInitializedMemory => { + required_buffer_inits + .entry(buffer_use.id) + .or_default() + .extend(uninitialized_ranges); + } + } + } + } + } + required_buffer_inits + }; + + if !required_buffer_inits.is_empty() { + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let (buffer_guard, _) = hub.buffers.read(&mut token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + device + .pending_writes + .dst_buffers + .extend(required_buffer_inits.keys()); + let pending_writes_cmd_buf = device.borrow_pending_writes(); + + for (buffer_id, ranges) in required_buffer_inits { + let buffer_raw = &buffer_guard + .get(buffer_id) + .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_id))? + .raw + .as_ref() + .ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))? + .0; + + // TODO: Currently, we implement zero init only via fill_buffer. Should we implement this also via map & shader/storage? + // Note that this requires TRANSFER usage on the buffer - see also device.create_buffer + + // TODO: Collapse ranges + for range in ranges { + // TODO SHIPSTOPPER: Insert necessary barriers!! + unsafe { + pending_writes_cmd_buf.fill_buffer( + buffer_raw, + hal::buffer::SubRange { + offset: range.start, + size: Some(range.end - range.start), + }, + 0, + ); + } + } + } + } + } let callbacks = { - let mut token = Token::root(); let (mut device_guard, mut token) = hub.devices.write(&mut token); let device = device_guard .get_mut(queue_id) @@ -601,9 +688,6 @@ impl Global { } } - // TODO - // go through memory_init_tracker of every buffer and gather which areas need to be cleared. - // execute resource transitions let mut transit = device.cmd_allocator.extend(cmdbuf); unsafe { diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index 551621e917..edc510fef8 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -1,18 +1,18 @@ -use hal::memory::Segment; use std::ops::Range; #[derive(Debug, Clone)] -pub(crate) enum MemoryInitTrackerAction { +pub(crate) enum MemoryInitKind { // The memory range is going to be written by an already initialized source, thus doesn't need extra attention. - ImplicitlyInitialized(Range), + ImplicitlyInitialized, // The memory range is going to be read, therefore needs to ensure prior initialization. - NeedsInitializedMemory(Range), + NeedsInitializedMemory, } #[derive(Debug, Clone)] pub(crate) struct ResourceMemoryInitTrackerAction { pub(crate) id: ResourceId, - pub(crate) action: MemoryInitTrackerAction, + pub(crate) range: Range, + pub(crate) kind: MemoryInitKind, } /// Tracks initialization status of a linear range from 0..size @@ -34,15 +34,11 @@ impl MemoryInitTracker { } } - pub(crate) fn drain_uninitialized_segments<'a>( + #[must_use] + pub(crate) fn drain_uninitialized_ranges<'a>( &'a mut self, - segment: Segment, - ) -> impl Iterator + 'a { - let range = match segment.size { - Some(size) => segment.offset..(segment.offset + size), - None => segment.offset..self.uninitialized_ranges.initial_range().end, - }; - + range: Range, + ) -> Option> + 'a> { let mut uninitialized_ranges: Vec> = self .uninitialized_ranges .allocated_ranges() @@ -58,22 +54,23 @@ impl MemoryInitTracker { }) .collect(); - std::iter::from_fn(move || { + if uninitialized_ranges.is_empty() { + return None; + } + + Some(std::iter::from_fn(move || { let range: Option> = uninitialized_ranges.last().map(|r| r.clone()); match range { Some(range) => { uninitialized_ranges.pop(); - let result = Some(Segment { - offset: range.start, - size: Some(range.end - range.start), - }); + let result = range.clone(); self.uninitialized_ranges.free_range(range); - result + Some(result) } None => None, } - }) + })) } } From 5d2afa52c190eba06068cc6a8f01f8b3ed86b896 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 24 Jan 2021 16:15:04 +0100 Subject: [PATCH 07/18] Fix zero init for buffers mapped at cration that are not mappable --- wgpu-core/src/device/mod.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 4d6ec533fc..002497dde0 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -2575,7 +2575,7 @@ impl Global { usage: wgt::BufferUsage::MAP_WRITE | wgt::BufferUsage::COPY_SRC, mapped_at_creation: false, }; - let stage = match device.create_buffer(device_id, &stage_desc, true) { + let mut stage = match device.create_buffer(device_id, &stage_desc, true) { Ok(stage) => stage, Err(e) => { let (raw, memory) = buffer.raw.unwrap(); @@ -2606,6 +2606,21 @@ impl Global { break e.into(); } }; + + // Zero initialize memory and then mark both staging and buffer as initialized + // (it's guaranteed that this is the case by the time the buffer is usable) + unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) }; + buffer + .initialization_status + .drain_uninitialized_ranges(0..buffer.size) + .unwrap() + .for_each(drop); + stage + .initialization_status + .drain_uninitialized_ranges(0..buffer.size) + .unwrap() + .for_each(drop); + buffer.map_state = resource::BufferMapState::Init { ptr, needs_flush: !stage_memory.is_coherent(), From 22fb9ab1eb7bc441e318d201fe4f209e8022198a Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 25 Jan 2021 00:06:45 +0100 Subject: [PATCH 08/18] buffer zero init on queue_submit emits correct barriers now refactored this zero init into separate method --- wgpu-core/src/device/mod.rs | 3 +- wgpu-core/src/device/queue.rs | 205 +++++++++++++++++++--------------- 2 files changed, 119 insertions(+), 89 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 002497dde0..fb61d4b823 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -504,8 +504,7 @@ impl Device { } } else { // We are required to zero out (initialize) all memory. - // This can be done via mapping, transfer or with a shader. However, right now we only implement it via transfer! - // TODO: Investigate if we could also do init via mapping or shader so that we need to alter the requested usage in less cases. + // This is done on demand using fill_buffer which requires write transfer usage! usage |= hal::buffer::Usage::TRANSFER_DST; } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index b5303ab16f..e454ee3d80 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -470,6 +470,122 @@ impl Global { Ok(()) } + // Enacts all zero initializations required by the given command buffers + // Required commands are appended to device.pending_writes + fn initialize_used_uninitialized_memory( + &self, + queue_id: id::QueueId, + command_buffer_ids: &[id::CommandBufferId], + ) -> Result<(), QueueSubmitError> { + if command_buffer_ids.is_empty() { + return Ok(()); + } + + let hub = B::hub(self); + let mut token = Token::root(); + + let mut required_buffer_inits = { + let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + + let mut required_buffer_inits: FastHashMap< + id::BufferId, + Vec>, + > = FastHashMap::default(); + + for &cmb_id in command_buffer_ids { + let cmdbuf = command_buffer_guard + .get(cmb_id) + .map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?; + + for buffer_use in cmdbuf.used_buffer_ranges.iter() { + let buffer = buffer_guard + .get_mut(buffer_use.id) + .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?; + + if let Some(uninitialized_ranges) = buffer + .initialization_status + .drain_uninitialized_ranges(buffer_use.range.clone()) + { + match buffer_use.kind { + MemoryInitKind::ImplicitlyInitialized => { + uninitialized_ranges.for_each(drop); + } + MemoryInitKind::NeedsInitializedMemory => { + required_buffer_inits + .entry(buffer_use.id) + .or_default() + .extend(uninitialized_ranges); + } + } + } + } + } + required_buffer_inits + }; + + // Memory init is expected to be rare (means user relies on default zero!), so most of the time we early here! + if required_buffer_inits.is_empty() { + return Ok(()); + } + + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let (buffer_guard, _) = hub.buffers.read(&mut token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + device + .pending_writes + .dst_buffers + .extend(required_buffer_inits.keys()); + device.borrow_pending_writes(); // Call ensures there is a pending_writes cmdbuffer, but using the reference returned would make the borrow checker unhappy! + let pending_writes_cmd_buf = device.pending_writes.command_buffer.as_mut().unwrap(); + let mut trackers = device.trackers.lock(); + + for (buffer_id, mut ranges) in required_buffer_inits.drain() { + // Collapse touching ranges. We can't do this any earlier since we only now gathered ranges from several different command buffers! + ranges.sort_by(|a, b| a.start.cmp(&b.start)); + for i in (1..ranges.len()).rev() { + assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this! + if ranges[i].start == ranges[i - 1].end { + ranges[i - 1].end = ranges[i].end; + ranges.remove(i); + } + } + + let (buffer, transition) = trackers + .buffers + .use_replace(&*buffer_guard, buffer_id, (), BufferUse::COPY_DST) + .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_id))?; + let &(ref buffer_raw, _) = buffer + .raw + .as_ref() + .ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))?; + unsafe { + pending_writes_cmd_buf.pipeline_barrier( + super::all_buffer_stages()..hal::pso::PipelineStage::TRANSFER, + hal::memory::Dependencies::empty(), + transition.map(|pending| pending.into_hal(buffer)), + ); + } + for range in ranges { + unsafe { + pending_writes_cmd_buf.fill_buffer( + buffer_raw, + hal::buffer::SubRange { + offset: range.start, + size: Some(range.end - range.start), + }, + 0, + ); + } + } + } + + Ok(()) + } + pub fn queue_submit( &self, queue_id: id::QueueId, @@ -477,96 +593,11 @@ impl Global { ) -> Result<(), QueueSubmitError> { span!(_guard, INFO, "Queue::submit"); + self.initialize_used_uninitialized_memory::(queue_id, command_buffer_ids)?; + let hub = B::hub(self); let mut token = Token::root(); - // Insert zero initializations if necessary. - // Since we push those to the pending_writes buffer, this needs to happen before any other processing - if !command_buffer_ids.is_empty() { - // Note that in the vast majority of cases this remains empty! - let required_buffer_inits = { - let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token); - let (mut buffer_guard, _) = hub.buffers.write(&mut token); - - let mut required_buffer_inits: FastHashMap< - id::BufferId, - Vec>, - > = FastHashMap::default(); - - for &cmb_id in command_buffer_ids { - let cmdbuf = command_buffer_guard - .get(cmb_id) - .map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?; - - for buffer_use in cmdbuf.used_buffer_ranges.iter() { - let buffer = buffer_guard - .get_mut(buffer_use.id) - .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?; - - if let Some(uninitialized_ranges) = buffer - .initialization_status - .drain_uninitialized_ranges(buffer_use.range.clone()) - { - match buffer_use.kind { - MemoryInitKind::ImplicitlyInitialized => { - uninitialized_ranges.for_each(drop); - } - MemoryInitKind::NeedsInitializedMemory => { - required_buffer_inits - .entry(buffer_use.id) - .or_default() - .extend(uninitialized_ranges); - } - } - } - } - } - required_buffer_inits - }; - - if !required_buffer_inits.is_empty() { - let (mut device_guard, mut token) = hub.devices.write(&mut token); - let (buffer_guard, _) = hub.buffers.read(&mut token); - let device = device_guard - .get_mut(queue_id) - .map_err(|_| DeviceError::Invalid)?; - - device - .pending_writes - .dst_buffers - .extend(required_buffer_inits.keys()); - let pending_writes_cmd_buf = device.borrow_pending_writes(); - - for (buffer_id, ranges) in required_buffer_inits { - let buffer_raw = &buffer_guard - .get(buffer_id) - .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_id))? - .raw - .as_ref() - .ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))? - .0; - - // TODO: Currently, we implement zero init only via fill_buffer. Should we implement this also via map & shader/storage? - // Note that this requires TRANSFER usage on the buffer - see also device.create_buffer - - // TODO: Collapse ranges - for range in ranges { - // TODO SHIPSTOPPER: Insert necessary barriers!! - unsafe { - pending_writes_cmd_buf.fill_buffer( - buffer_raw, - hal::buffer::SubRange { - offset: range.start, - size: Some(range.end - range.start), - }, - 0, - ); - } - } - } - } - } - let callbacks = { let (mut device_guard, mut token) = hub.devices.write(&mut token); let device = device_guard From 492027fe6e390874b2ae987ea192fa456e474bb7 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 25 Jan 2021 22:16:33 +0100 Subject: [PATCH 09/18] command buffer buffer memory init tracking is only filled if necessary now --- wgpu-core/src/binding_model.rs | 4 +- wgpu-core/src/command/allocator.rs | 2 +- wgpu-core/src/command/bundle.rs | 18 ++-- wgpu-core/src/command/compute.rs | 40 ++++++--- wgpu-core/src/command/mod.rs | 15 +--- wgpu-core/src/command/render.rs | 127 ++++++++++++++++++--------- wgpu-core/src/command/transfer.rs | 88 ++++++++++++------- wgpu-core/src/device/mod.rs | 8 +- wgpu-core/src/device/queue.rs | 6 +- wgpu-core/src/memory_init_tracker.rs | 12 ++- 10 files changed, 198 insertions(+), 122 deletions(-) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 1490b3dffb..dd40398ff0 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -9,7 +9,7 @@ use crate::{ }, hub::Resource, id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureViewId, Valid}, - memory_init_tracker::ResourceMemoryInitTrackerAction, + memory_init_tracker::MemoryInitTrackerAction, track::{TrackerSet, DUMMY_SELECTOR}, validation::{MissingBufferUsageError, MissingTextureUsageError}, FastHashMap, Label, LifeGuard, MultiRefCount, Stored, MAX_BIND_GROUPS, @@ -589,7 +589,7 @@ pub struct BindGroup { pub(crate) layout_id: Valid, pub(crate) life_guard: LifeGuard, pub(crate) used: TrackerSet, - pub(crate) used_buffer_ranges: Vec>, + pub(crate) used_buffer_ranges: Vec>, pub(crate) dynamic_binding_info: Vec, } diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs index fb75c69c19..4a021c9ab0 100644 --- a/wgpu-core/src/command/allocator.rs +++ b/wgpu-core/src/command/allocator.rs @@ -123,7 +123,7 @@ impl CommandAllocator { device_id, trackers: TrackerSet::new(B::VARIANT), used_swap_chains: Default::default(), - used_buffer_ranges: Default::default(), + buffer_memory_init_actions: Default::default(), limits, private_features, has_labels: label.is_some(), diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 357da0ab75..da37e1b447 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -49,7 +49,7 @@ use crate::{ }, hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token}, id, - memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction}, resource::BufferUse, span, track::{TrackerSet, UsageConflict}, @@ -160,7 +160,7 @@ impl RenderBundleEncoder { let mut commands = Vec::new(); let mut base = self.base.as_ref(); let mut pipeline_layout_id = None::>; - let mut used_buffer_ranges = Vec::new(); + let mut buffer_memory_init_actions = Vec::new(); for &command in base.commands { match command { @@ -206,7 +206,7 @@ impl RenderBundleEncoder { .map_pass_err(scope); } - used_buffer_ranges + buffer_memory_init_actions .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets); @@ -267,7 +267,7 @@ impl RenderBundleEncoder { Some(s) => offset + s.get(), None => buffer.size, }; - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + buffer_memory_init_actions.push(MemoryInitTrackerAction { id: buffer_id, range: offset..end, kind: MemoryInitKind::NeedsInitializedMemory, @@ -294,7 +294,7 @@ impl RenderBundleEncoder { Some(s) => offset + s.get(), None => buffer.size, }; - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + buffer_memory_init_actions.push(MemoryInitTrackerAction { id: buffer_id, range: offset..end, kind: MemoryInitKind::NeedsInitializedMemory, @@ -411,7 +411,7 @@ impl RenderBundleEncoder { check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) .map_pass_err(scope)?; - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + buffer_memory_init_actions.push(MemoryInitTrackerAction { id: buffer_id, range: 0..buffer.size, kind: MemoryInitKind::NeedsInitializedMemory, @@ -442,7 +442,7 @@ impl RenderBundleEncoder { .map_pass_err(scope)?; let stride = 4 * 4; // 4 integers, vertexCount, instanceCount, firstVertex, firstInstance - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + buffer_memory_init_actions.push(MemoryInitTrackerAction { id: buffer_id, range: 0..stride, kind: MemoryInitKind::NeedsInitializedMemory, @@ -482,7 +482,7 @@ impl RenderBundleEncoder { ref_count: device.life_guard.add_ref(), }, used: state.trackers, - used_buffer_ranges, + buffer_memory_init_actions, context: self.context, life_guard: LifeGuard::new(desc.label.borrow_or_default()), }) @@ -531,7 +531,7 @@ pub struct RenderBundle { base: BasePass, pub(crate) device_id: Stored, pub(crate) used: TrackerSet, - pub(crate) used_buffer_ranges: Vec>, + pub(crate) buffer_memory_init_actions: Vec>, pub(crate) context: RenderPassContext, pub(crate) life_guard: LifeGuard, } diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index f9119fedce..e9158d4c1a 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -11,7 +11,7 @@ use crate::{ }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, - memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction}, resource::{Buffer, BufferUse, Texture}, span, track::{TrackerSet, UsageConflict}, @@ -147,6 +147,8 @@ pub enum ComputePassErrorInner { end_offset: u64, buffer_size: u64, }, + #[error("buffer {0:?} is invalid or destroyed")] + InvalidBuffer(id::BufferId), #[error(transparent)] ResourceUsageConflict(#[from] UsageConflict), #[error(transparent)] @@ -330,9 +332,18 @@ impl Global { .validate_dynamic_bindings(&temp_offsets) .map_pass_err(scope)?; - cmd_buf - .used_buffer_ranges - .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); + cmd_buf.buffer_memory_init_actions.extend( + bind_group + .used_buffer_ranges + .iter() + .filter(|action| match buffer_guard.get(action.id) { + Ok(buffer) => { + !buffer.initialization_status.is_initialized(&action.range) + } + Err(_) => false, + }) + .map(|action| action.clone()), + ); if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( index as usize, @@ -519,13 +530,20 @@ impl Global { .map_pass_err(scope)?; let stride = 3 * 4; // 3 integers, x/y/z group size - cmd_buf - .used_buffer_ranges - .push(ResourceMemoryInitTrackerAction { - id: buffer_id, - range: offset..(offset + stride), - kind: MemoryInitKind::NeedsInitializedMemory, - }); + + let used_buffer_range = offset..(offset + stride); + if !indirect_buffer + .initialization_status + .is_initialized(&used_buffer_range) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: buffer_id, + range: used_buffer_range, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } state .flush_states( diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 88c3df314c..edbda616e5 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -24,7 +24,7 @@ use crate::{ device::{all_buffer_stages, all_image_stages}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, - memory_init_tracker::ResourceMemoryInitTrackerAction, + memory_init_tracker::MemoryInitTrackerAction, resource::{Buffer, Texture}, span, track::TrackerSet, @@ -47,7 +47,7 @@ pub struct CommandBuffer { pub(crate) device_id: Stored, pub(crate) trackers: TrackerSet, pub(crate) used_swap_chains: SmallVec<[Stored; 1]>, - pub(crate) used_buffer_ranges: Vec>, + pub(crate) buffer_memory_init_actions: Vec>, limits: wgt::Limits, private_features: PrivateFeatures, has_labels: bool, @@ -58,17 +58,6 @@ pub struct CommandBuffer { } impl CommandBuffer { - fn get_encoder( - storage: &Storage, - id: id::CommandEncoderId, - ) -> Result<&Self, CommandEncoderError> { - match storage.get(id) { - Ok(cmd_buf) if cmd_buf.is_recording => Ok(cmd_buf), - Ok(_) => Err(CommandEncoderError::NotRecording), - Err(_) => Err(CommandEncoderError::Invalid), - } - } - fn get_encoder_mut( storage: &mut Storage, id: id::CommandEncoderId, diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index bf54a24a59..20ed60a15c 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -17,7 +17,7 @@ use crate::{ }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, - memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction}, pipeline::PipelineFlags, resource::{BufferUse, Texture, TextureUse, TextureView, TextureViewInner}, span, @@ -1025,12 +1025,12 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); - let (cmd_buf_raw, trackers, used_swapchain, used_buffer_ranges, query_reset_state) = { + let (cmd_buf_raw, trackers, used_swapchain, query_reset_state) = { // read-only lock guard - let (cmb_guard, mut token) = hub.command_buffers.read(&mut token); + let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = - CommandBuffer::get_encoder(&*cmb_guard, encoder_id).map_pass_err(scope)?; + CommandBuffer::get_encoder_mut(&mut *cmb_guard, encoder_id).map_pass_err(scope)?; let device = &device_guard[cmd_buf.device_id.value]; let mut raw = device.cmd_allocator.extend(cmd_buf); unsafe { @@ -1080,7 +1080,6 @@ impl Global { let mut string_offset = 0; let mut active_query = None; let mut query_reset_state = QueryResetMap::new(); - let mut used_buffer_ranges = Vec::new(); for command in base.commands { match *command { @@ -1119,8 +1118,18 @@ impl Global { .merge_extend(&bind_group.used) .map_pass_err(scope)?; - used_buffer_ranges - .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); + cmd_buf.buffer_memory_init_actions.extend( + bind_group + .used_buffer_ranges + .iter() + .filter(|action| match buffer_guard.get(action.id) { + Ok(buffer) => { + !buffer.initialization_status.is_initialized(&action.range) + } + Err(_) => false, + }) + .map(|action| action.clone()), + ); if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( index as usize, @@ -1298,11 +1307,15 @@ impl Global { state.index.format = Some(index_format); state.index.update_limit(); - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { - id: buffer_id, - range: offset..end, - kind: MemoryInitKind::NeedsInitializedMemory, - }); + if !buffer.initialization_status.is_initialized(&(offset..end)) { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: buffer_id, + range: offset..end, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } let range = hal::buffer::SubRange { offset, @@ -1347,11 +1360,16 @@ impl Global { }; vertex_state.bound = true; - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { - id: buffer_id, - range: offset..(offset + vertex_state.total_size), - kind: MemoryInitKind::NeedsInitializedMemory, - }); + let used_range = offset..(offset + vertex_state.total_size); + if !buffer.initialization_status.is_initialized(&used_range) { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: buffer_id, + range: used_range, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } let range = hal::buffer::SubRange { offset, @@ -1605,11 +1623,18 @@ impl Global { .map_pass_err(scope); } - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { - id: buffer_id, - range: offset..end_offset, - kind: MemoryInitKind::NeedsInitializedMemory, - }); + if !indirect_buffer + .initialization_status + .is_initialized(&(offset..end_offset)) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: buffer_id, + range: offset..end_offset, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } match indexed { false => unsafe { @@ -1694,6 +1719,18 @@ impl Global { }) .map_pass_err(scope); } + if !indirect_buffer + .initialization_status + .is_initialized(&(offset..end_offset)) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: buffer_id, + range: offset..end_offset, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } let begin_count_offset = count_buffer_offset; let end_count_offset = count_buffer_offset + 4; @@ -1705,17 +1742,18 @@ impl Global { }) .map_pass_err(scope); } - - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { - id: buffer_id, - range: offset..end_offset, - kind: MemoryInitKind::NeedsInitializedMemory, - }); - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { - id: count_buffer_id, - range: count_buffer_offset..end_count_offset, - kind: MemoryInitKind::NeedsInitializedMemory, - }); + if !count_buffer + .initialization_status + .is_initialized(&(count_buffer_offset..end_count_offset)) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: count_buffer_id, + range: count_buffer_offset..end_count_offset, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } match indexed { false => unsafe { @@ -1849,8 +1887,18 @@ impl Global { .map_err(RenderPassErrorInner::IncompatibleRenderBundle) .map_pass_err(scope)?; - used_buffer_ranges - .extend(bundle.used_buffer_ranges.iter().map(|x| x.clone())); + cmd_buf.buffer_memory_init_actions.extend( + bundle + .buffer_memory_init_actions + .iter() + .filter(|action| match buffer_guard.get(action.id) { + Ok(buffer) => { + !buffer.initialization_status.is_initialized(&action.range) + } + Err(_) => false, + }) + .map(|action| action.clone()), + ); unsafe { bundle.execute( @@ -1882,13 +1930,7 @@ impl Global { } let (trackers, used_swapchain) = info.finish(&*texture_guard).map_pass_err(scope)?; - ( - raw, - trackers, - used_swapchain, - used_buffer_ranges, - query_reset_state, - ) + (raw, trackers, used_swapchain, query_reset_state) }; let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); @@ -1899,7 +1941,6 @@ impl Global { CommandBuffer::get_encoder_mut(&mut *cmb_guard, encoder_id).map_pass_err(scope)?; cmd_buf.has_labels |= base.label.is_some(); cmd_buf.used_swap_chains.extend(used_swapchain); - cmd_buf.used_buffer_ranges.extend(used_buffer_ranges); #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 2f59ca37d3..23b99cc390 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -10,7 +10,7 @@ use crate::{ device::{all_buffer_stages, all_image_stages}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id::{BufferId, CommandEncoderId, TextureId}, - memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction}, + memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction}, resource::{BufferUse, Texture, TextureErrorDimension, TextureUse}, span, track::TextureSelector, @@ -404,20 +404,32 @@ impl Global { } // Make sure source is initialized memory and mark dest as initialized. - cmd_buf - .used_buffer_ranges - .push(ResourceMemoryInitTrackerAction { - id: destination, - range: destination_offset..(destination_offset + size), - kind: MemoryInitKind::ImplicitlyInitialized, - }); - cmd_buf - .used_buffer_ranges - .push(ResourceMemoryInitTrackerAction { - id: source, - range: source_offset..(source_offset + size), - kind: MemoryInitKind::NeedsInitializedMemory, - }); + let used_dst_buffer_range = destination_offset..(destination_offset + size); + if !dst_buffer + .initialization_status + .is_initialized(&used_dst_buffer_range) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: destination, + range: used_dst_buffer_range, + kind: MemoryInitKind::ImplicitlyInitialized, + }); + } + let used_src_buffer_range = source_offset..(source_offset + size); + if !src_buffer + .initialization_status + .is_initialized(&used_src_buffer_range) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: source, + range: used_src_buffer_range, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } let region = hal::command::BufferCopy { src: source_offset, @@ -532,14 +544,20 @@ impl Global { copy_size, )?; - cmd_buf - .used_buffer_ranges - .push(ResourceMemoryInitTrackerAction { - id: source.buffer, - range: source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), - kind: MemoryInitKind::NeedsInitializedMemory, - }); - // TODO: Mark dest texture memory as implicitly initialized here. + let used_src_buffer_range = + source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy); + if !src_buffer + .initialization_status + .is_initialized(&used_src_buffer_range) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: source.buffer, + range: used_src_buffer_range, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + } let (block_width, _) = dst_texture.format.describe().block_dimensions; if !conv::is_valid_copy_dst_texture_format(dst_texture.format) { @@ -688,16 +706,20 @@ impl Global { ))? } - cmd_buf - .used_buffer_ranges - .push(ResourceMemoryInitTrackerAction { - id: destination.buffer, - range: destination.layout.offset - ..(destination.layout.offset + required_buffer_bytes_in_copy), - kind: MemoryInitKind::ImplicitlyInitialized, - }); - // TODO: Mark dest texture memory as required to be initialized here. - + let used_dst_buffer_range = + destination.layout.offset..(destination.layout.offset + required_buffer_bytes_in_copy); + if !dst_buffer + .initialization_status + .is_initialized(&used_dst_buffer_range) + { + cmd_buf + .buffer_memory_init_actions + .push(MemoryInitTrackerAction { + id: destination.buffer, + range: used_dst_buffer_range, + kind: MemoryInitKind::ImplicitlyInitialized, + }); + } // WebGPU uses the physical size of the texture for copies whereas vulkan uses // the virtual size. We have passed validation, so it's safe to use the // image extent data directly. We want the provided copy size to be no larger than diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index fb61d4b823..64a0b199d3 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -211,7 +211,7 @@ fn map_buffer( if let Some(uninitialized_ranges) = buffer .initialization_status - .drain_uninitialized_ranges(offset..(size + offset)) + .drain_uninitialized_ranges(&(offset..(size + offset))) { for uninitialized_range in uninitialized_ranges { let num_bytes = uninitialized_range.end - uninitialized_range.start; @@ -1366,7 +1366,7 @@ impl Device { return Err(Error::BindingZeroSize(bb.buffer_id)); } - used_buffer_ranges.push(ResourceMemoryInitTrackerAction { + used_buffer_ranges.push(MemoryInitTrackerAction { id: bb.buffer_id, range: bb.offset..(bb.offset + bind_size), kind: MemoryInitKind::NeedsInitializedMemory, @@ -2611,12 +2611,12 @@ impl Global { unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) }; buffer .initialization_status - .drain_uninitialized_ranges(0..buffer.size) + .drain_uninitialized_ranges(&(0..buffer.size)) .unwrap() .for_each(drop); stage .initialization_status - .drain_uninitialized_ranges(0..buffer.size) + .drain_uninitialized_ranges(&(0..buffer.size)) .unwrap() .for_each(drop); diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index e454ee3d80..e59ad116ff 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -277,7 +277,7 @@ impl Global { let dst = buffer_guard.get_mut(buffer_id).unwrap(); if let Some(uninitialized_ranges) = dst .initialization_status - .drain_uninitialized_ranges(buffer_offset..(buffer_offset + data_size)) + .drain_uninitialized_ranges(&(buffer_offset..(buffer_offset + data_size))) { uninitialized_ranges.for_each(drop); } @@ -498,14 +498,14 @@ impl Global { .get(cmb_id) .map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?; - for buffer_use in cmdbuf.used_buffer_ranges.iter() { + for buffer_use in cmdbuf.buffer_memory_init_actions.iter() { let buffer = buffer_guard .get_mut(buffer_use.id) .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?; if let Some(uninitialized_ranges) = buffer .initialization_status - .drain_uninitialized_ranges(buffer_use.range.clone()) + .drain_uninitialized_ranges(&buffer_use.range) { match buffer_use.kind { MemoryInitKind::ImplicitlyInitialized => { diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index edc510fef8..4277e4efae 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -2,14 +2,14 @@ use std::ops::Range; #[derive(Debug, Clone)] pub(crate) enum MemoryInitKind { - // The memory range is going to be written by an already initialized source, thus doesn't need extra attention. + // The memory range is going to be written by an already initialized source, thus doesn't need extra attention other than marking as initialized. ImplicitlyInitialized, // The memory range is going to be read, therefore needs to ensure prior initialization. NeedsInitializedMemory, } #[derive(Debug, Clone)] -pub(crate) struct ResourceMemoryInitTrackerAction { +pub(crate) struct MemoryInitTrackerAction { pub(crate) id: ResourceId, pub(crate) range: Range, pub(crate) kind: MemoryInitKind, @@ -34,10 +34,16 @@ impl MemoryInitTracker { } } + pub(crate) fn is_initialized(&self, range: &Range) -> bool { + self.uninitialized_ranges + .allocated_ranges() + .all(|r: Range| r.start >= range.end || r.end <= range.start) + } + #[must_use] pub(crate) fn drain_uninitialized_ranges<'a>( &'a mut self, - range: Range, + range: &Range, ) -> Option> + 'a> { let mut uninitialized_ranges: Vec> = self .uninitialized_ranges From 32b4e32ac67953f8e7961eca08d93b1e2bf71ac9 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 26 Jan 2021 22:17:04 +0100 Subject: [PATCH 10/18] MemoryInitTracker tests and interface adjustments --- wgpu-core/src/device/mod.rs | 27 ++++---- wgpu-core/src/device/queue.rs | 31 ++++----- wgpu-core/src/memory_init_tracker.rs | 96 +++++++++++++++++++++++++--- 3 files changed, 112 insertions(+), 42 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 64a0b199d3..58067f953e 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -208,23 +208,20 @@ fn map_buffer( // // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. - - if let Some(uninitialized_ranges) = buffer + for uninitialized_range in buffer .initialization_status .drain_uninitialized_ranges(&(offset..(size + offset))) { - for uninitialized_range in uninitialized_ranges { - let num_bytes = uninitialized_range.end - uninitialized_range.start; - unsafe { - ptr::write_bytes( - ptr.as_ptr().offset(uninitialized_range.start as isize), - 0, - num_bytes as usize, - ) - }; - if zero_init_needs_flush_now { - block.flush_range(raw, uninitialized_range.start, Some(num_bytes))?; - } + let num_bytes = uninitialized_range.end - uninitialized_range.start; + unsafe { + ptr::write_bytes( + ptr.as_ptr().offset(uninitialized_range.start as isize), + 0, + num_bytes as usize, + ) + }; + if zero_init_needs_flush_now { + block.flush_range(raw, uninitialized_range.start, Some(num_bytes))?; } } @@ -2612,12 +2609,10 @@ impl Global { buffer .initialization_status .drain_uninitialized_ranges(&(0..buffer.size)) - .unwrap() .for_each(drop); stage .initialization_status .drain_uninitialized_ranges(&(0..buffer.size)) - .unwrap() .for_each(drop); buffer.map_state = resource::BufferMapState::Init { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index e59ad116ff..4690099407 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -275,12 +275,9 @@ impl Global { // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. { let dst = buffer_guard.get_mut(buffer_id).unwrap(); - if let Some(uninitialized_ranges) = dst - .initialization_status + dst.initialization_status .drain_uninitialized_ranges(&(buffer_offset..(buffer_offset + data_size))) - { - uninitialized_ranges.for_each(drop); - } + .for_each(drop); } Ok(()) @@ -503,20 +500,18 @@ impl Global { .get_mut(buffer_use.id) .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?; - if let Some(uninitialized_ranges) = buffer + let uninitialized_ranges = buffer .initialization_status - .drain_uninitialized_ranges(&buffer_use.range) - { - match buffer_use.kind { - MemoryInitKind::ImplicitlyInitialized => { - uninitialized_ranges.for_each(drop); - } - MemoryInitKind::NeedsInitializedMemory => { - required_buffer_inits - .entry(buffer_use.id) - .or_default() - .extend(uninitialized_ranges); - } + .drain_uninitialized_ranges(&buffer_use.range); + match buffer_use.kind { + MemoryInitKind::ImplicitlyInitialized => { + uninitialized_ranges.for_each(drop); + } + MemoryInitKind::NeedsInitializedMemory => { + required_buffer_inits + .entry(buffer_use.id) + .or_default() + .extend(uninitialized_ranges); } } } diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index 4277e4efae..ebbf0d7f80 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -44,7 +44,7 @@ impl MemoryInitTracker { pub(crate) fn drain_uninitialized_ranges<'a>( &'a mut self, range: &Range, - ) -> Option> + 'a> { + ) -> impl Iterator> + 'a { let mut uninitialized_ranges: Vec> = self .uninitialized_ranges .allocated_ranges() @@ -60,11 +60,7 @@ impl MemoryInitTracker { }) .collect(); - if uninitialized_ranges.is_empty() { - return None; - } - - Some(std::iter::from_fn(move || { + std::iter::from_fn(move || { let range: Option> = uninitialized_ranges.last().map(|r| r.clone()); match range { @@ -76,8 +72,92 @@ impl MemoryInitTracker { } None => None, } - })) + }) } } -// TODO: Add some unit tests for this construct +#[cfg(test)] +mod test { + use std::ops::Range; + + use super::MemoryInitTracker; + + #[test] + fn is_initialized_for_empty_tracker() { + let tracker = MemoryInitTracker::new(10); + assert!(!tracker.is_initialized(&(0..10))); + assert!(!tracker.is_initialized(&(0..3))); + assert!(!tracker.is_initialized(&(3..4))); + assert!(!tracker.is_initialized(&(4..10))); + } + + #[test] + fn is_initialized_for_filled_tracker() { + let mut tracker = MemoryInitTracker::new(10); + tracker.drain_uninitialized_ranges(&(0..10)).for_each(drop); + assert!(tracker.is_initialized(&(0..10))); + assert!(tracker.is_initialized(&(0..3))); + assert!(tracker.is_initialized(&(3..4))); + assert!(tracker.is_initialized(&(4..10))); + } + + #[test] + fn is_initialized_for_partially_filled_tracker() { + let mut tracker = MemoryInitTracker::new(10); + tracker.drain_uninitialized_ranges(&(4..6)).for_each(drop); + assert!(!tracker.is_initialized(&(0..10))); // entire range + assert!(!tracker.is_initialized(&(0..4))); // left non-overlapping + assert!(!tracker.is_initialized(&(3..5))); // left overlapping + assert!(tracker.is_initialized(&(4..6))); // entire initialized range + assert!(tracker.is_initialized(&(4..5))); // left part + assert!(tracker.is_initialized(&(5..6))); // right part + assert!(!tracker.is_initialized(&(5..7))); // right overlapping + assert!(!tracker.is_initialized(&(7..10))); // right non-overlapping + } + + #[test] + fn drain_uninitialized_ranges_never_returns_ranges_twice_for_same_range() { + let mut tracker = MemoryInitTracker::new(19); + assert_eq!(tracker.drain_uninitialized_ranges(&(0..19)).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(&(0..19)).count(), 0); + + let mut tracker = MemoryInitTracker::new(17); + assert_eq!(tracker.drain_uninitialized_ranges(&(5..8)).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(&(5..8)).count(), 0); + assert_eq!(tracker.drain_uninitialized_ranges(&(1..3)).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(&(1..3)).count(), 0); + assert_eq!(tracker.drain_uninitialized_ranges(&(7..13)).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(&(7..13)).count(), 0); + } + + #[test] + fn drain_uninitialized_ranges_splits_ranges_correctly() { + let mut tracker = MemoryInitTracker::new(1337); + assert_eq!( + tracker + .drain_uninitialized_ranges(&(21..42)) + .collect::>>(), + vec![21..42] + ); + assert_eq!( + tracker + .drain_uninitialized_ranges(&(900..1000)) + .collect::>>(), + vec![900..1000] + ); + + // Splitted ranges. + assert_eq!( + tracker + .drain_uninitialized_ranges(&(5..1003)) + .collect::>>(), + vec![1000..1003, 42..900, 5..21] + ); + assert_eq!( + tracker + .drain_uninitialized_ranges(&(0..1337)) + .collect::>>(), + vec![1003..1337, 0..5] + ); + } +} From 216dcf949bf2691d2f8e16b962c813ef08e09385 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 27 Jan 2021 22:47:19 +0100 Subject: [PATCH 11/18] MemoryInitTracker has now a custom impl that doesn't depend on range-alloc --- Cargo.lock | 1 - wgpu-core/Cargo.toml | 1 - wgpu-core/src/device/mod.rs | 6 +- wgpu-core/src/device/queue.rs | 4 +- wgpu-core/src/memory_init_tracker.rs | 149 ++++++++++++++++----------- 5 files changed, 96 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3b63ac9f7c..90c4ab1840 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1807,7 +1807,6 @@ dependencies = [ "loom", "naga", "parking_lot", - "range-alloc", "raw-window-handle", "ron", "serde", diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 8bdb81c342..453d779c68 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -38,7 +38,6 @@ gpu-descriptor = { version = "0.1", features = ["tracing"] } hal = { package = "gfx-hal", git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8" } gfx-backend-empty = { git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8" } -range-alloc = {git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8"} [target.'cfg(all(not(target_arch = "wasm32"), all(unix, not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] gfx-backend-vulkan = { git = "https://github.com/gfx-rs/gfx", rev = "2fd74dbe1562a3eef05b11dcd300c1c9c9bc12a8", features = ["naga"] } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 58067f953e..c2cf35b0b3 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -210,7 +210,7 @@ fn map_buffer( let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. for uninitialized_range in buffer .initialization_status - .drain_uninitialized_ranges(&(offset..(size + offset))) + .drain_uninitialized_ranges(offset..(size + offset)) { let num_bytes = uninitialized_range.end - uninitialized_range.start; unsafe { @@ -2608,11 +2608,11 @@ impl Global { unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) }; buffer .initialization_status - .drain_uninitialized_ranges(&(0..buffer.size)) + .drain_uninitialized_ranges(0..buffer.size) .for_each(drop); stage .initialization_status - .drain_uninitialized_ranges(&(0..buffer.size)) + .drain_uninitialized_ranges(0..buffer.size) .for_each(drop); buffer.map_state = resource::BufferMapState::Init { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 4690099407..87e7c7f672 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -276,7 +276,7 @@ impl Global { { let dst = buffer_guard.get_mut(buffer_id).unwrap(); dst.initialization_status - .drain_uninitialized_ranges(&(buffer_offset..(buffer_offset + data_size))) + .drain_uninitialized_ranges(buffer_offset..(buffer_offset + data_size)) .for_each(drop); } @@ -502,7 +502,7 @@ impl Global { let uninitialized_ranges = buffer .initialization_status - .drain_uninitialized_ranges(&buffer_use.range); + .drain_uninitialized_ranges(buffer_use.range.clone()); match buffer_use.kind { MemoryInitKind::ImplicitlyInitialized => { uninitialized_ranges.for_each(drop); diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index ebbf0d7f80..2e9780877d 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -18,69 +18,102 @@ pub(crate) struct MemoryInitTrackerAction { /// Tracks initialization status of a linear range from 0..size #[derive(Debug)] pub(crate) struct MemoryInitTracker { - // TODO: Use a more fitting data structure! - // An allocated range in this allocator means that the range in question is NOT yet initialized. - uninitialized_ranges: range_alloc::RangeAllocator, + uninitialized_ranges: Vec>, +} + +pub(crate) struct MemoryInitTrackerDrain<'a> { + uninitialized_ranges: &'a mut Vec>, + drain_range: Range, + next_index: usize, +} + +impl<'a> Iterator for MemoryInitTrackerDrain<'a> { + type Item = Range; + + fn next(&mut self) -> Option { + let uninitialized_range = match self.uninitialized_ranges.get_mut(self.next_index) { + Some(range) => range, + None => return None, + }; + if uninitialized_range.start >= self.drain_range.end { + // No more cuts possible (we're going left to right!) + None + } else if uninitialized_range.end > self.drain_range.end { + // cut-out / split + if uninitialized_range.start < self.drain_range.start { + let old_start = uninitialized_range.start; + uninitialized_range.start = self.drain_range.end; + self.uninitialized_ranges + .insert(self.next_index, old_start..self.drain_range.start); + self.next_index = std::usize::MAX; + Some(self.drain_range.clone()) + } + // right cut + else { + let result = uninitialized_range.start..self.drain_range.end; + self.next_index = std::usize::MAX; + uninitialized_range.start = self.drain_range.end; + Some(result) + } + } else { + // left cut + if uninitialized_range.start < self.drain_range.start { + let result = self.drain_range.start..uninitialized_range.end; + uninitialized_range.end = self.drain_range.start; + self.next_index = self.next_index + 1; + Some(result) + } + // fully contained. + else { + let result = uninitialized_range.clone(); + self.uninitialized_ranges.remove(self.next_index); + Some(result) + } + } + } } impl MemoryInitTracker { pub(crate) fn new(size: wgt::BufferAddress) -> Self { - let mut uninitialized_ranges = - range_alloc::RangeAllocator::::new(0..size); - let _ = uninitialized_ranges.allocate_range(size); - Self { - uninitialized_ranges, + uninitialized_ranges: vec![0..size], } } - pub(crate) fn is_initialized(&self, range: &Range) -> bool { - self.uninitialized_ranges - .allocated_ranges() - .all(|r: Range| r.start >= range.end || r.end <= range.start) + pub(crate) fn is_initialized(&self, query_range: &Range) -> bool { + match self + .uninitialized_ranges + .iter() + .find(|r| r.end > query_range.start) + { + Some(r) => r.start >= query_range.end, + None => true, + } } #[must_use] pub(crate) fn drain_uninitialized_ranges<'a>( &'a mut self, - range: &Range, - ) -> impl Iterator> + 'a { - let mut uninitialized_ranges: Vec> = self + drain_range: Range, + ) -> MemoryInitTrackerDrain<'a> { + let next_index = self .uninitialized_ranges - .allocated_ranges() - .filter_map(|r: Range| { - if r.end > range.start && r.start < range.end { - Some(Range { - start: range.start.max(r.start), - end: range.end.min(r.end), - }) - } else { - None - } - }) - .collect(); + .iter() + .position(|r| r.end > drain_range.start) + .unwrap_or(std::usize::MAX); - std::iter::from_fn(move || { - let range: Option> = - uninitialized_ranges.last().map(|r| r.clone()); - match range { - Some(range) => { - uninitialized_ranges.pop(); - let result = range.clone(); - self.uninitialized_ranges.free_range(range); - Some(result) - } - None => None, - } - }) + MemoryInitTrackerDrain { + next_index, + drain_range, + uninitialized_ranges: &mut self.uninitialized_ranges, + } } } #[cfg(test)] mod test { - use std::ops::Range; - use super::MemoryInitTracker; + use std::ops::Range; #[test] fn is_initialized_for_empty_tracker() { @@ -94,7 +127,7 @@ mod test { #[test] fn is_initialized_for_filled_tracker() { let mut tracker = MemoryInitTracker::new(10); - tracker.drain_uninitialized_ranges(&(0..10)).for_each(drop); + tracker.drain_uninitialized_ranges(0..10).for_each(drop); assert!(tracker.is_initialized(&(0..10))); assert!(tracker.is_initialized(&(0..3))); assert!(tracker.is_initialized(&(3..4))); @@ -104,7 +137,7 @@ mod test { #[test] fn is_initialized_for_partially_filled_tracker() { let mut tracker = MemoryInitTracker::new(10); - tracker.drain_uninitialized_ranges(&(4..6)).for_each(drop); + tracker.drain_uninitialized_ranges(4..6).for_each(drop); assert!(!tracker.is_initialized(&(0..10))); // entire range assert!(!tracker.is_initialized(&(0..4))); // left non-overlapping assert!(!tracker.is_initialized(&(3..5))); // left overlapping @@ -118,16 +151,16 @@ mod test { #[test] fn drain_uninitialized_ranges_never_returns_ranges_twice_for_same_range() { let mut tracker = MemoryInitTracker::new(19); - assert_eq!(tracker.drain_uninitialized_ranges(&(0..19)).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(&(0..19)).count(), 0); + assert_eq!(tracker.drain_uninitialized_ranges(0..19).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(0..19).count(), 0); let mut tracker = MemoryInitTracker::new(17); - assert_eq!(tracker.drain_uninitialized_ranges(&(5..8)).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(&(5..8)).count(), 0); - assert_eq!(tracker.drain_uninitialized_ranges(&(1..3)).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(&(1..3)).count(), 0); - assert_eq!(tracker.drain_uninitialized_ranges(&(7..13)).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(&(7..13)).count(), 0); + assert_eq!(tracker.drain_uninitialized_ranges(5..8).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(5..8).count(), 0); + assert_eq!(tracker.drain_uninitialized_ranges(1..3).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(1..3).count(), 0); + assert_eq!(tracker.drain_uninitialized_ranges(7..13).count(), 1); + assert_eq!(tracker.drain_uninitialized_ranges(7..13).count(), 0); } #[test] @@ -135,13 +168,13 @@ mod test { let mut tracker = MemoryInitTracker::new(1337); assert_eq!( tracker - .drain_uninitialized_ranges(&(21..42)) + .drain_uninitialized_ranges(21..42) .collect::>>(), vec![21..42] ); assert_eq!( tracker - .drain_uninitialized_ranges(&(900..1000)) + .drain_uninitialized_ranges(900..1000) .collect::>>(), vec![900..1000] ); @@ -149,15 +182,15 @@ mod test { // Splitted ranges. assert_eq!( tracker - .drain_uninitialized_ranges(&(5..1003)) + .drain_uninitialized_ranges(5..1003) .collect::>>(), - vec![1000..1003, 42..900, 5..21] + vec![5..21, 42..900, 1000..1003] ); assert_eq!( tracker - .drain_uninitialized_ranges(&(0..1337)) + .drain_uninitialized_ranges(0..1337) .collect::>>(), - vec![1003..1337, 0..5] + vec![0..5, 1003..1337] ); } } From cc183c885f1412fce021d14d4d48f203c1bc0fdc Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 27 Jan 2021 23:59:52 +0100 Subject: [PATCH 12/18] Added player test for buffer-zero-init --- player/tests/data/all.ron | 1 + player/tests/data/buffer-zero-init.ron | 70 ++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 player/tests/data/buffer-zero-init.ron diff --git a/player/tests/data/all.ron b/player/tests/data/all.ron index ef76e7dac8..0b493a9b83 100644 --- a/player/tests/data/all.ron +++ b/player/tests/data/all.ron @@ -2,6 +2,7 @@ backends: (bits: 0xF), tests: [ "buffer-copy.ron", + "buffer-zero-init.ron", "bind-group.ron", "quad.ron", ], diff --git a/player/tests/data/buffer-zero-init.ron b/player/tests/data/buffer-zero-init.ron new file mode 100644 index 0000000000..90a9f0c7b4 --- /dev/null +++ b/player/tests/data/buffer-zero-init.ron @@ -0,0 +1,70 @@ +( + features: (bits: 0x0), + expectations: [ + ( + name: "mapped_at_creation: false, with MAP_WRITE", + buffer: (index: 0, epoch: 1), + offset: 0, + data: Raw([0x00, 0x00, 0x00, 0x00]), + ), + ( + name: "mapped_at_creation: false, without MAP_WRITE", + buffer: (index: 1, epoch: 1), + offset: 0, + data: Raw([0x00, 0x00, 0x00, 0x00]), + ), + ( + name: "partially written buffer", + buffer: (index: 2, epoch: 1), + offset: 0, + data: Raw([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00]), + ), + ], + actions: [ + CreateBuffer( + Id(0, 1, Empty), + ( + label: Some("mapped_at_creation: false, with MAP_WRITE"), + size: 16, + usage: ( + bits: 131, // STORAGE + MAP_READ + MAP_WRITE + ), + mapped_at_creation: false, + ), + ), + CreateBuffer( + Id(1, 1, Empty), + ( + label: Some("mapped_at_creation: false, without MAP_WRITE"), + size: 16, + usage: ( + bits: 129, // STORAGE + MAP_READ + ), + mapped_at_creation: false, + ), + ), + CreateBuffer( + Id(2, 1, Empty), + ( + label: Some("partially written"), + size: 24, + usage: ( + bits: 9, // MAP_READ + COPY_DST + ), + mapped_at_creation: false, + ), + ), + WriteBuffer( + id: Id(2, 1, Empty), + data: "data1.bin", + range: ( + start: 4, + end: 20, + ), + queued: true, + ), + + // TODO: Add a buffer with `mapped_at_creation: true`. As of writing there's no unmap action we could insert here. + Submit(1, []), + ], +) \ No newline at end of file From da86b3f401a4b2fbe3184a87c2a7551d351cb011 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 30 Jan 2021 09:59:51 +0100 Subject: [PATCH 13/18] Cleanup, better use of some rust std lib functions --- wgpu-core/src/command/bundle.rs | 3 +-- wgpu-core/src/command/compute.rs | 2 +- wgpu-core/src/command/render.rs | 4 ++-- wgpu-core/src/device/mod.rs | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index da37e1b447..9f847d86cd 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -206,8 +206,7 @@ impl RenderBundleEncoder { .map_pass_err(scope); } - buffer_memory_init_actions - .extend(bind_group.used_buffer_ranges.iter().map(|x| x.clone())); + buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges); state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets); state diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index e9158d4c1a..bd83217673 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -342,7 +342,7 @@ impl Global { } Err(_) => false, }) - .map(|action| action.clone()), + .cloned(), ); if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 20ed60a15c..32351b4aa1 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1128,7 +1128,7 @@ impl Global { } Err(_) => false, }) - .map(|action| action.clone()), + .cloned(), ); if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( @@ -1897,7 +1897,7 @@ impl Global { } Err(_) => false, }) - .map(|action| action.clone()), + .cloned(), ); unsafe { diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index c2cf35b0b3..964b64e223 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -9,7 +9,7 @@ use crate::{ GfxBackend, Global, GlobalIdentityHandlerFactory, Hub, Input, InvalidId, Storage, Token, }, id, instance, - memory_init_tracker::*, + memory_init_tracker::{MemoryInitKind, MemoryInitTracker, MemoryInitTrackerAction}, pipeline, resource, span, swap_chain, track::{BufferState, TextureSelector, TextureState, TrackerSet}, validation::{self, check_buffer_usage, check_texture_usage}, From 31d292b1694098e74c1ecedbbd41e5d272cbb43e Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 30 Jan 2021 10:09:33 +0100 Subject: [PATCH 14/18] added clear helper method to memory_init_tracker, renamed drain --- wgpu-core/src/device/mod.rs | 15 +++-------- wgpu-core/src/device/queue.rs | 8 +++--- wgpu-core/src/memory_init_tracker.rs | 40 ++++++++++++++++------------ 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 964b64e223..0cc5f46de3 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -208,10 +208,7 @@ fn map_buffer( // // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. - for uninitialized_range in buffer - .initialization_status - .drain_uninitialized_ranges(offset..(size + offset)) - { + for uninitialized_range in buffer.initialization_status.drain(offset..(size + offset)) { let num_bytes = uninitialized_range.end - uninitialized_range.start; unsafe { ptr::write_bytes( @@ -2606,14 +2603,8 @@ impl Global { // Zero initialize memory and then mark both staging and buffer as initialized // (it's guaranteed that this is the case by the time the buffer is usable) unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) }; - buffer - .initialization_status - .drain_uninitialized_ranges(0..buffer.size) - .for_each(drop); - stage - .initialization_status - .drain_uninitialized_ranges(0..buffer.size) - .for_each(drop); + buffer.initialization_status.clear(0..buffer.size); + stage.initialization_status.clear(0..buffer.size); buffer.map_state = resource::BufferMapState::Init { ptr, diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 87e7c7f672..d89ce3bdf7 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -276,8 +276,7 @@ impl Global { { let dst = buffer_guard.get_mut(buffer_id).unwrap(); dst.initialization_status - .drain_uninitialized_ranges(buffer_offset..(buffer_offset + data_size)) - .for_each(drop); + .clear(buffer_offset..(buffer_offset + data_size)); } Ok(()) @@ -500,9 +499,8 @@ impl Global { .get_mut(buffer_use.id) .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?; - let uninitialized_ranges = buffer - .initialization_status - .drain_uninitialized_ranges(buffer_use.range.clone()); + let uninitialized_ranges = + buffer.initialization_status.drain(buffer_use.range.clone()); match buffer_use.kind { MemoryInitKind::ImplicitlyInitialized => { uninitialized_ranges.for_each(drop); diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index 2e9780877d..e06e690f6b 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -91,8 +91,9 @@ impl MemoryInitTracker { } } + // Drains uninitialized ranges in a query range. #[must_use] - pub(crate) fn drain_uninitialized_ranges<'a>( + pub(crate) fn drain<'a>( &'a mut self, drain_range: Range, ) -> MemoryInitTrackerDrain<'a> { @@ -108,6 +109,11 @@ impl MemoryInitTracker { uninitialized_ranges: &mut self.uninitialized_ranges, } } + + // Clears uninitialized ranges in a query range. + pub(crate) fn clear(&mut self, drain_range: Range) { + self.drain(drain_range).for_each(drop); + } } #[cfg(test)] @@ -127,7 +133,7 @@ mod test { #[test] fn is_initialized_for_filled_tracker() { let mut tracker = MemoryInitTracker::new(10); - tracker.drain_uninitialized_ranges(0..10).for_each(drop); + tracker.clear(0..10); assert!(tracker.is_initialized(&(0..10))); assert!(tracker.is_initialized(&(0..3))); assert!(tracker.is_initialized(&(3..4))); @@ -137,7 +143,7 @@ mod test { #[test] fn is_initialized_for_partially_filled_tracker() { let mut tracker = MemoryInitTracker::new(10); - tracker.drain_uninitialized_ranges(4..6).for_each(drop); + tracker.clear(4..6); assert!(!tracker.is_initialized(&(0..10))); // entire range assert!(!tracker.is_initialized(&(0..4))); // left non-overlapping assert!(!tracker.is_initialized(&(3..5))); // left overlapping @@ -149,32 +155,32 @@ mod test { } #[test] - fn drain_uninitialized_ranges_never_returns_ranges_twice_for_same_range() { + fn drain_never_returns_ranges_twice_for_same_range() { let mut tracker = MemoryInitTracker::new(19); - assert_eq!(tracker.drain_uninitialized_ranges(0..19).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(0..19).count(), 0); + assert_eq!(tracker.drain(0..19).count(), 1); + assert_eq!(tracker.drain(0..19).count(), 0); let mut tracker = MemoryInitTracker::new(17); - assert_eq!(tracker.drain_uninitialized_ranges(5..8).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(5..8).count(), 0); - assert_eq!(tracker.drain_uninitialized_ranges(1..3).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(1..3).count(), 0); - assert_eq!(tracker.drain_uninitialized_ranges(7..13).count(), 1); - assert_eq!(tracker.drain_uninitialized_ranges(7..13).count(), 0); + assert_eq!(tracker.drain(5..8).count(), 1); + assert_eq!(tracker.drain(5..8).count(), 0); + assert_eq!(tracker.drain(1..3).count(), 1); + assert_eq!(tracker.drain(1..3).count(), 0); + assert_eq!(tracker.drain(7..13).count(), 1); + assert_eq!(tracker.drain(7..13).count(), 0); } #[test] - fn drain_uninitialized_ranges_splits_ranges_correctly() { + fn drain_splits_ranges_correctly() { let mut tracker = MemoryInitTracker::new(1337); assert_eq!( tracker - .drain_uninitialized_ranges(21..42) + .drain(21..42) .collect::>>(), vec![21..42] ); assert_eq!( tracker - .drain_uninitialized_ranges(900..1000) + .drain(900..1000) .collect::>>(), vec![900..1000] ); @@ -182,13 +188,13 @@ mod test { // Splitted ranges. assert_eq!( tracker - .drain_uninitialized_ranges(5..1003) + .drain(5..1003) .collect::>>(), vec![5..21, 42..900, 1000..1003] ); assert_eq!( tracker - .drain_uninitialized_ranges(0..1337) + .drain(0..1337) .collect::>>(), vec![0..5, 1003..1337] ); From 018ad05f5612c5f1418b54ce596007facf56c479 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 30 Jan 2021 20:17:15 +0100 Subject: [PATCH 15/18] memory init tracker check (previous is_initialized) clamps range now and is O(log n)! --- wgpu-core/src/command/compute.rs | 42 ++++---- wgpu-core/src/command/render.rs | 126 ++++++++++++------------ wgpu-core/src/command/transfer.rs | 82 +++++++--------- wgpu-core/src/memory_init_tracker.rs | 138 +++++++++++++++++++-------- 4 files changed, 220 insertions(+), 168 deletions(-) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index bd83217673..c810b327ef 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -333,16 +333,19 @@ impl Global { .map_pass_err(scope)?; cmd_buf.buffer_memory_init_actions.extend( - bind_group - .used_buffer_ranges - .iter() - .filter(|action| match buffer_guard.get(action.id) { - Ok(buffer) => { - !buffer.initialization_status.is_initialized(&action.range) - } - Err(_) => false, - }) - .cloned(), + bind_group.used_buffer_ranges.iter().filter_map( + |action| match buffer_guard.get(action.id) { + Ok(buffer) => buffer + .initialization_status + .check(action.range.clone()) + .map(|range| MemoryInitTrackerAction { + id: action.id, + range, + kind: action.kind, + }), + Err(_) => None, + }, + ), ); if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( @@ -531,19 +534,16 @@ impl Global { let stride = 3 * 4; // 3 integers, x/y/z group size - let used_buffer_range = offset..(offset + stride); - if !indirect_buffer - .initialization_status - .is_initialized(&used_buffer_range) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + indirect_buffer + .initialization_status + .check(offset..(offset + stride)) + .map(|range| MemoryInitTrackerAction { id: buffer_id, - range: used_buffer_range, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); state .flush_states( diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 32351b4aa1..cb5547750a 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1119,16 +1119,19 @@ impl Global { .map_pass_err(scope)?; cmd_buf.buffer_memory_init_actions.extend( - bind_group - .used_buffer_ranges - .iter() - .filter(|action| match buffer_guard.get(action.id) { - Ok(buffer) => { - !buffer.initialization_status.is_initialized(&action.range) - } - Err(_) => false, - }) - .cloned(), + bind_group.used_buffer_ranges.iter().filter_map(|action| { + match buffer_guard.get(action.id) { + Ok(buffer) => buffer + .initialization_status + .check(action.range.clone()) + .map(|range| MemoryInitTrackerAction { + id: action.id, + range, + kind: action.kind, + }), + Err(_) => None, + } + }), ); if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( @@ -1307,15 +1310,16 @@ impl Global { state.index.format = Some(index_format); state.index.update_limit(); - if !buffer.initialization_status.is_initialized(&(offset..end)) { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + buffer + .initialization_status + .check(offset..end) + .map(|range| MemoryInitTrackerAction { id: buffer_id, - range: offset..end, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); let range = hal::buffer::SubRange { offset, @@ -1360,16 +1364,16 @@ impl Global { }; vertex_state.bound = true; - let used_range = offset..(offset + vertex_state.total_size); - if !buffer.initialization_status.is_initialized(&used_range) { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + buffer + .initialization_status + .check(offset..(offset + vertex_state.total_size)) + .map(|range| MemoryInitTrackerAction { id: buffer_id, - range: used_range, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); let range = hal::buffer::SubRange { offset, @@ -1623,18 +1627,16 @@ impl Global { .map_pass_err(scope); } - if !indirect_buffer - .initialization_status - .is_initialized(&(offset..end_offset)) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + indirect_buffer + .initialization_status + .check(offset..end_offset) + .map(|range| MemoryInitTrackerAction { id: buffer_id, - range: offset..end_offset, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); match indexed { false => unsafe { @@ -1719,18 +1721,16 @@ impl Global { }) .map_pass_err(scope); } - if !indirect_buffer - .initialization_status - .is_initialized(&(offset..end_offset)) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + indirect_buffer + .initialization_status + .check(offset..end_offset) + .map(|range| MemoryInitTrackerAction { id: buffer_id, - range: offset..end_offset, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); let begin_count_offset = count_buffer_offset; let end_count_offset = count_buffer_offset + 4; @@ -1742,18 +1742,16 @@ impl Global { }) .map_pass_err(scope); } - if !count_buffer - .initialization_status - .is_initialized(&(count_buffer_offset..end_count_offset)) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + count_buffer + .initialization_status + .check(count_buffer_offset..end_count_offset) + .map(|range| MemoryInitTrackerAction { id: count_buffer_id, - range: count_buffer_offset..end_count_offset, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); match indexed { false => unsafe { @@ -1891,13 +1889,17 @@ impl Global { bundle .buffer_memory_init_actions .iter() - .filter(|action| match buffer_guard.get(action.id) { - Ok(buffer) => { - !buffer.initialization_status.is_initialized(&action.range) - } - Err(_) => false, - }) - .cloned(), + .filter_map(|action| match buffer_guard.get(action.id) { + Ok(buffer) => buffer + .initialization_status + .check(action.range.clone()) + .map(|range| MemoryInitTrackerAction { + id: action.id, + range, + kind: action.kind, + }), + Err(_) => None, + }), ); unsafe { diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 23b99cc390..2502236589 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -404,32 +404,26 @@ impl Global { } // Make sure source is initialized memory and mark dest as initialized. - let used_dst_buffer_range = destination_offset..(destination_offset + size); - if !dst_buffer - .initialization_status - .is_initialized(&used_dst_buffer_range) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + dst_buffer + .initialization_status + .check(destination_offset..(destination_offset + size)) + .map(|range| MemoryInitTrackerAction { id: destination, - range: used_dst_buffer_range, + range, kind: MemoryInitKind::ImplicitlyInitialized, - }); - } - let used_src_buffer_range = source_offset..(source_offset + size); - if !src_buffer - .initialization_status - .is_initialized(&used_src_buffer_range) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + }), + ); + cmd_buf.buffer_memory_init_actions.extend( + src_buffer + .initialization_status + .check(source_offset..(source_offset + size)) + .map(|range| MemoryInitTrackerAction { id: source, - range: used_src_buffer_range, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); let region = hal::command::BufferCopy { src: source_offset, @@ -544,20 +538,16 @@ impl Global { copy_size, )?; - let used_src_buffer_range = - source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy); - if !src_buffer - .initialization_status - .is_initialized(&used_src_buffer_range) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + src_buffer + .initialization_status + .check(source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy)) + .map(|range| MemoryInitTrackerAction { id: source.buffer, - range: used_src_buffer_range, + range, kind: MemoryInitKind::NeedsInitializedMemory, - }); - } + }), + ); let (block_width, _) = dst_texture.format.describe().block_dimensions; if !conv::is_valid_copy_dst_texture_format(dst_texture.format) { @@ -706,20 +696,20 @@ impl Global { ))? } - let used_dst_buffer_range = - destination.layout.offset..(destination.layout.offset + required_buffer_bytes_in_copy); - if !dst_buffer - .initialization_status - .is_initialized(&used_dst_buffer_range) - { - cmd_buf - .buffer_memory_init_actions - .push(MemoryInitTrackerAction { + cmd_buf.buffer_memory_init_actions.extend( + dst_buffer + .initialization_status + .check( + destination.layout.offset + ..(destination.layout.offset + required_buffer_bytes_in_copy), + ) + .map(|range| MemoryInitTrackerAction { id: destination.buffer, - range: used_dst_buffer_range, + range, kind: MemoryInitKind::ImplicitlyInitialized, - }); - } + }), + ); + // WebGPU uses the physical size of the texture for copies whereas vulkan uses // the virtual size. We have passed validation, so it's safe to use the // image extent data directly. We want the provided copy size to be no larger than diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index e06e690f6b..6874e1fcfa 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -1,6 +1,6 @@ use std::ops::Range; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub(crate) enum MemoryInitKind { // The memory range is going to be written by an already initialized source, thus doesn't need extra attention other than marking as initialized. ImplicitlyInitialized, @@ -18,6 +18,7 @@ pub(crate) struct MemoryInitTrackerAction { /// Tracks initialization status of a linear range from 0..size #[derive(Debug)] pub(crate) struct MemoryInitTracker { + // Ordered, non overlapping list of all uninitialized ranges. uninitialized_ranges: Vec>, } @@ -80,15 +81,59 @@ impl MemoryInitTracker { } } - pub(crate) fn is_initialized(&self, query_range: &Range) -> bool { - match self - .uninitialized_ranges - .iter() - .find(|r| r.end > query_range.start) - { - Some(r) => r.start >= query_range.end, - None => true, + // Search smallest range.end which is bigger than bound in O(log n) (with n being number of uninitialized ranges) + fn lower_bound(&self, bound: wgt::BufferAddress) -> usize { + // This is equivalent to, except that it may return an out of bounds index instead of + //self.uninitialized_ranges.iter().position(|r| r.end > bound) + + // In future Rust versions this operation can be done with partition_point + // See https://github.com/rust-lang/rust/pull/73577/ + let mut left = 0; + let mut right = self.uninitialized_ranges.len(); + + while left != right { + let mid = left + (right - left) / 2; + let value = unsafe { self.uninitialized_ranges.get_unchecked(mid) }; + + if value.end <= bound { + left = mid + 1; + } else { + right = mid; + } } + + left + } + + // Checks if there's any uninitialized ranges within a query. + // If there are any, the range returned a the subrange of the query_range that contains all these uninitialized regions. + // Returned range may be larger than necessary (tradeoff for making this function O(log n)) + pub(crate) fn check( + &self, + query_range: Range, + ) -> Option> { + let index = self.lower_bound(query_range.start); + self.uninitialized_ranges + .get(index) + .map(|start_range| { + if start_range.start < query_range.end { + let start = start_range.start.max(query_range.start); + match self.uninitialized_ranges.get(index + 1) { + Some(next_range) => { + if next_range.start < query_range.end { + // Would need to keep iterating for more accurate upper bound. Don't do that here. + Some(start..query_range.end) + } else { + Some(start..start_range.end.min(query_range.end)) + } + } + None => Some(start..start_range.end.min(query_range.end)), + } + } else { + None + } + }) + .flatten() } // Drains uninitialized ranges in a query range. @@ -97,22 +142,16 @@ impl MemoryInitTracker { &'a mut self, drain_range: Range, ) -> MemoryInitTrackerDrain<'a> { - let next_index = self - .uninitialized_ranges - .iter() - .position(|r| r.end > drain_range.start) - .unwrap_or(std::usize::MAX); - MemoryInitTrackerDrain { - next_index, + next_index: self.lower_bound(drain_range.start), drain_range, uninitialized_ranges: &mut self.uninitialized_ranges, } } // Clears uninitialized ranges in a query range. - pub(crate) fn clear(&mut self, drain_range: Range) { - self.drain(drain_range).for_each(drop); + pub(crate) fn clear(&mut self, range: Range) { + self.drain(range).for_each(drop); } } @@ -122,36 +161,57 @@ mod test { use std::ops::Range; #[test] - fn is_initialized_for_empty_tracker() { + fn check_for_newly_created_tracker() { let tracker = MemoryInitTracker::new(10); - assert!(!tracker.is_initialized(&(0..10))); - assert!(!tracker.is_initialized(&(0..3))); - assert!(!tracker.is_initialized(&(3..4))); - assert!(!tracker.is_initialized(&(4..10))); + assert_eq!(tracker.check(0..10), Some(0..10)); + assert_eq!(tracker.check(0..3), Some(0..3)); + assert_eq!(tracker.check(3..4), Some(3..4)); + assert_eq!(tracker.check(4..10), Some(4..10)); } #[test] - fn is_initialized_for_filled_tracker() { + fn check_for_cleared_tracker() { let mut tracker = MemoryInitTracker::new(10); tracker.clear(0..10); - assert!(tracker.is_initialized(&(0..10))); - assert!(tracker.is_initialized(&(0..3))); - assert!(tracker.is_initialized(&(3..4))); - assert!(tracker.is_initialized(&(4..10))); + assert_eq!(tracker.check(0..10), None); + assert_eq!(tracker.check(0..3), None); + assert_eq!(tracker.check(3..4), None); + assert_eq!(tracker.check(4..10), None); } #[test] - fn is_initialized_for_partially_filled_tracker() { - let mut tracker = MemoryInitTracker::new(10); - tracker.clear(4..6); - assert!(!tracker.is_initialized(&(0..10))); // entire range - assert!(!tracker.is_initialized(&(0..4))); // left non-overlapping - assert!(!tracker.is_initialized(&(3..5))); // left overlapping - assert!(tracker.is_initialized(&(4..6))); // entire initialized range - assert!(tracker.is_initialized(&(4..5))); // left part - assert!(tracker.is_initialized(&(5..6))); // right part - assert!(!tracker.is_initialized(&(5..7))); // right overlapping - assert!(!tracker.is_initialized(&(7..10))); // right non-overlapping + fn check_for_partially_filled_tracker() { + let mut tracker = MemoryInitTracker::new(25); + // Two regions of uninitialized memory + tracker.clear(0..5); + tracker.clear(10..15); + tracker.clear(20..25); + + assert_eq!(tracker.check(0..25), Some(5..25)); // entire range + + assert_eq!(tracker.check(0..5), None); // left non-overlapping + assert_eq!(tracker.check(3..8), Some(5..8)); // left overlapping region + assert_eq!(tracker.check(3..17), Some(5..17)); // left overlapping region + contained region + + assert_eq!(tracker.check(8..22), Some(8..22)); // right overlapping region + contained region (yes, doesn't fix range end!) + assert_eq!(tracker.check(17..22), Some(17..20)); // right overlapping region + assert_eq!(tracker.check(20..25), None); // right non-overlapping + } + + #[test] + fn clear_already_cleared() { + let mut tracker = MemoryInitTracker::new(30); + tracker.clear(10..20); + + // Overlapping with non-cleared + tracker.clear(5..15); // Left overlap + tracker.clear(15..25); // Right overlap + tracker.clear(0..30); // Inner overlap + + // Clear fully cleared + tracker.clear(0..30); + + assert_eq!(tracker.check(0..30), None); } #[test] From a8460b438cea69aec5887c230a3dedaf9ceb995c Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 30 Jan 2021 22:23:23 +0100 Subject: [PATCH 16/18] MemoryInitTracker drain is now O(N) --- wgpu-core/src/memory_init_tracker.rs | 87 +++++++++++++++++----------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/wgpu-core/src/memory_init_tracker.rs b/wgpu-core/src/memory_init_tracker.rs index 6874e1fcfa..c153be3906 100644 --- a/wgpu-core/src/memory_init_tracker.rs +++ b/wgpu-core/src/memory_init_tracker.rs @@ -25,6 +25,7 @@ pub(crate) struct MemoryInitTracker { pub(crate) struct MemoryInitTrackerDrain<'a> { uninitialized_ranges: &'a mut Vec>, drain_range: Range, + first_index: usize, next_index: usize, } @@ -32,44 +33,58 @@ impl<'a> Iterator for MemoryInitTrackerDrain<'a> { type Item = Range; fn next(&mut self) -> Option { - let uninitialized_range = match self.uninitialized_ranges.get_mut(self.next_index) { - Some(range) => range, - None => return None, - }; - if uninitialized_range.start >= self.drain_range.end { - // No more cuts possible (we're going left to right!) - None - } else if uninitialized_range.end > self.drain_range.end { - // cut-out / split - if uninitialized_range.start < self.drain_range.start { - let old_start = uninitialized_range.start; - uninitialized_range.start = self.drain_range.end; - self.uninitialized_ranges - .insert(self.next_index, old_start..self.drain_range.start); - self.next_index = std::usize::MAX; - Some(self.drain_range.clone()) - } - // right cut - else { - let result = uninitialized_range.start..self.drain_range.end; - self.next_index = std::usize::MAX; - uninitialized_range.start = self.drain_range.end; - Some(result) - } + if let Some(r) = self + .uninitialized_ranges + .get(self.next_index) + .and_then(|range| { + if range.start < self.drain_range.end { + Some(range.clone()) + } else { + None + } + }) + { + self.next_index += 1; + Some(r.start.max(self.drain_range.start)..r.end.min(self.drain_range.end)) } else { - // left cut - if uninitialized_range.start < self.drain_range.start { - let result = self.drain_range.start..uninitialized_range.end; - uninitialized_range.end = self.drain_range.start; - self.next_index = self.next_index + 1; - Some(result) + let num_affected = self.next_index - self.first_index; + if num_affected == 0 { + return None; } - // fully contained. + + let first_range = &mut self.uninitialized_ranges[self.first_index]; + + // Split one "big" uninitialized range? + if num_affected == 1 + && first_range.start < self.drain_range.start + && first_range.end > self.drain_range.end + { + let old_start = first_range.start; + first_range.start = self.drain_range.end; + self.uninitialized_ranges + .insert(self.first_index, old_start..self.drain_range.start); + } + // Adjust border ranges and delete everything in-between. else { - let result = uninitialized_range.clone(); - self.uninitialized_ranges.remove(self.next_index); - Some(result) + let remove_start = if first_range.start >= self.drain_range.start { + self.first_index + } else { + first_range.end = self.drain_range.start; + self.first_index + 1 + }; + + let last_range = &mut self.uninitialized_ranges[self.next_index - 1]; + let remove_end = if last_range.end <= self.drain_range.end { + self.next_index + } else { + last_range.start = self.drain_range.end; + self.next_index - 1 + }; + + self.uninitialized_ranges.drain(remove_start..remove_end); } + + None } } } @@ -142,10 +157,12 @@ impl MemoryInitTracker { &'a mut self, drain_range: Range, ) -> MemoryInitTrackerDrain<'a> { + let index = self.lower_bound(drain_range.start); MemoryInitTrackerDrain { - next_index: self.lower_bound(drain_range.start), drain_range, uninitialized_ranges: &mut self.uninitialized_ranges, + first_index: index, + next_index: index, } } From 4f5acba5fb65371b62827add590fdc294cdec811 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 30 Jan 2021 22:24:11 +0100 Subject: [PATCH 17/18] =?UTF-8?q?Fixed=20too=20eager=20lock=20usage,=20fix?= =?UTF-8?q?ed=20O(N=C2=B2)=20buffer=20range=20collapse?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wgpu-core/src/device/queue.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index d89ce3bdf7..ebbec17a06 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -192,7 +192,7 @@ impl Global { let device = device_guard .get_mut(queue_id) .map_err(|_| DeviceError::Invalid)?; - let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let (buffer_guard, _) = hub.buffers.read(&mut token); #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { @@ -274,6 +274,9 @@ impl Global { // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. { + drop(buffer_guard); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let dst = buffer_guard.get_mut(buffer_id).unwrap(); dst.initialization_status .clear(buffer_offset..(buffer_offset + data_size)); @@ -482,7 +485,6 @@ impl Global { let mut required_buffer_inits = { let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token); - let (mut buffer_guard, _) = hub.buffers.write(&mut token); let mut required_buffer_inits: FastHashMap< id::BufferId, @@ -494,6 +496,12 @@ impl Global { .get(cmb_id) .map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?; + if cmdbuf.buffer_memory_init_actions.len() == 0 { + continue; + } + + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + for buffer_use in cmdbuf.buffer_memory_init_actions.iter() { let buffer = buffer_guard .get_mut(buffer_use.id) @@ -543,7 +551,7 @@ impl Global { assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this! if ranges[i].start == ranges[i - 1].end { ranges[i - 1].end = ranges[i].end; - ranges.remove(i); + ranges.swap_remove(i); // Ordering not important at this point } } @@ -563,12 +571,17 @@ impl Global { ); } for range in ranges { + let size = range.end - range.start; + + assert!(range.start % 4 == 0, "Buffer {:?} has an uninitialized range with a start not aligned to 4 (start was {})", buffer, range.start); + assert!(size % 4 == 0, "Buffer {:?} has an uninitialized range with a size not aligned to 4 (size was {})", buffer, size); + unsafe { pending_writes_cmd_buf.fill_buffer( buffer_raw, hal::buffer::SubRange { offset: range.start, - size: Some(range.end - range.start), + size: Some(size), }, 0, ); From 822424e519b5b06bd70698eddbb88c8c75d53627 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 31 Jan 2021 22:01:52 +0100 Subject: [PATCH 18/18] Fix buffer init in bundle.rs with mem::size_of:: --- wgpu-core/src/command/bundle.rs | 41 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 9f847d86cd..f67fb61cb1 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -57,7 +57,7 @@ use crate::{ Label, LabelHelpers, LifeGuard, Stored, MAX_BIND_GROUPS, }; use arrayvec::ArrayVec; -use std::{borrow::Cow, iter, ops::Range}; +use std::{borrow::Cow, iter, mem, ops::Range}; use thiserror::Error; /// Describes a [`RenderBundleEncoder`]. @@ -393,7 +393,7 @@ impl RenderBundleEncoder { } RenderCommand::MultiDrawIndirect { buffer_id, - offset: _, + offset, count: None, indexed: false, } => { @@ -410,11 +410,18 @@ impl RenderBundleEncoder { check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) .map_pass_err(scope)?; - buffer_memory_init_actions.push(MemoryInitTrackerAction { - id: buffer_id, - range: 0..buffer.size, - kind: MemoryInitKind::NeedsInitializedMemory, - }); + buffer_memory_init_actions.extend( + buffer + .initialization_status + .check( + offset..(offset + mem::size_of::() as u64), + ) + .map(|range| MemoryInitTrackerAction { + id: buffer_id, + range, + kind: MemoryInitKind::NeedsInitializedMemory, + }), + ); commands.extend(state.flush_vertices()); commands.extend(state.flush_binds()); @@ -422,7 +429,7 @@ impl RenderBundleEncoder { } RenderCommand::MultiDrawIndirect { buffer_id, - offset: _, + offset, count: None, indexed: true, } => { @@ -440,12 +447,18 @@ impl RenderBundleEncoder { check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) .map_pass_err(scope)?; - let stride = 4 * 4; // 4 integers, vertexCount, instanceCount, firstVertex, firstInstance - buffer_memory_init_actions.push(MemoryInitTrackerAction { - id: buffer_id, - range: 0..stride, - kind: MemoryInitKind::NeedsInitializedMemory, - }); + buffer_memory_init_actions.extend( + buffer + .initialization_status + .check( + offset..(offset + mem::size_of::() as u64), + ) + .map(|range| MemoryInitTrackerAction { + id: buffer_id, + range, + kind: MemoryInitKind::NeedsInitializedMemory, + }), + ); commands.extend(state.index.flush()); commands.extend(state.flush_vertices());