1159: Zero initialize buffers r=kvark a=Wumpf

**Connections**
First half of  #563, focusing solely on buffers and ignoring same issue for textures

**Description**
Tracks for each buffer which parts are initialized (i.e. set to zero). Identified three interaction points for this:
* buffer mapping: Zeroing out ad-hoc and marking as initialized
* queue write to buffer: Marks target buffer regions as initialized (i.e. optimizes away buffer init down the line)
* use in binding or copy operation in a command buffer:
   * fine grained tracking of regions that may require init (or will be initialized implicitly) on each command buffer
   * set in motion on queue submit, init is exclusively with `fill_buffer`

Todo list for Ready-to-Review

- [x] memory barriers for `fill_buffer` calls
- [x] better data structure for `memory_init_tracker`
- [x] coarse filtering on command-buffer buffer init requirements (the list should almost always be empty whereas now it pushes any buffer use)
- [x] improve naming of things
- [x] at least pretend this is adequately tested

Todo list beyond this PR

* make data structures usable for textures
  * and.. well.. implement all this for textures!
* explore reusing barrier tracker for memory init tracking?


**Testing**

* Some basic testing by doing some changes to wgpu-rs samples and watching them in in the debugger.
* Added a ron test file for the player (love those!) to poke the feature a little bit
* MemoryInitTracker comes with simple unit tests

Overall this is a bit shallow but as so often in this area accurate testing is hard because the outcomes are quite indirect



Co-authored-by: Andreas Reich <r_andreas2@web.de>
This commit is contained in:
bors[bot]
2021-02-01 03:17:36 +00:00
committed by GitHub
14 changed files with 765 additions and 28 deletions

View File

@@ -2,6 +2,7 @@
backends: (bits: 0xF),
tests: [
"buffer-copy.ron",
"buffer-zero-init.ron",
"bind-group.ron",
"quad.ron",
],

View File

@@ -0,0 +1,70 @@
(
features: (bits: 0x0),
expectations: [
(
name: "mapped_at_creation: false, with MAP_WRITE",
buffer: (index: 0, epoch: 1),
offset: 0,
data: Raw([0x00, 0x00, 0x00, 0x00]),
),
(
name: "mapped_at_creation: false, without MAP_WRITE",
buffer: (index: 1, epoch: 1),
offset: 0,
data: Raw([0x00, 0x00, 0x00, 0x00]),
),
(
name: "partially written buffer",
buffer: (index: 2, epoch: 1),
offset: 0,
data: Raw([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00]),
),
],
actions: [
CreateBuffer(
Id(0, 1, Empty),
(
label: Some("mapped_at_creation: false, with MAP_WRITE"),
size: 16,
usage: (
bits: 131, // STORAGE + MAP_READ + MAP_WRITE
),
mapped_at_creation: false,
),
),
CreateBuffer(
Id(1, 1, Empty),
(
label: Some("mapped_at_creation: false, without MAP_WRITE"),
size: 16,
usage: (
bits: 129, // STORAGE + MAP_READ
),
mapped_at_creation: false,
),
),
CreateBuffer(
Id(2, 1, Empty),
(
label: Some("partially written"),
size: 24,
usage: (
bits: 9, // MAP_READ + COPY_DST
),
mapped_at_creation: false,
),
),
WriteBuffer(
id: Id(2, 1, Empty),
data: "data1.bin",
range: (
start: 4,
end: 20,
),
queued: true,
),
// TODO: Add a buffer with `mapped_at_creation: true`. As of writing there's no unmap action we could insert here.
Submit(1, []),
],
)

View File

@@ -9,6 +9,7 @@ use crate::{
},
hub::Resource,
id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureViewId, Valid},
memory_init_tracker::MemoryInitTrackerAction,
track::{TrackerSet, DUMMY_SELECTOR},
validation::{MissingBufferUsageError, MissingTextureUsageError},
FastHashMap, Label, LifeGuard, MultiRefCount, Stored, MAX_BIND_GROUPS,
@@ -588,6 +589,7 @@ pub struct BindGroup<B: hal::Backend> {
pub(crate) layout_id: Valid<BindGroupLayoutId>,
pub(crate) life_guard: LifeGuard,
pub(crate) used: TrackerSet,
pub(crate) used_buffer_ranges: Vec<MemoryInitTrackerAction<BufferId>>,
pub(crate) dynamic_binding_info: Vec<BindGroupDynamicBindingData>,
}

View File

@@ -123,6 +123,7 @@ impl<B: GfxBackend> CommandAllocator<B> {
device_id,
trackers: TrackerSet::new(B::VARIANT),
used_swap_chains: Default::default(),
buffer_memory_init_actions: Default::default(),
limits,
private_features,
has_labels: label.is_some(),

View File

@@ -49,6 +49,7 @@ use crate::{
},
hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token},
id,
memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction},
resource::BufferUse,
span,
track::{TrackerSet, UsageConflict},
@@ -56,7 +57,7 @@ use crate::{
Label, LabelHelpers, LifeGuard, Stored, MAX_BIND_GROUPS,
};
use arrayvec::ArrayVec;
use std::{borrow::Cow, iter, ops::Range};
use std::{borrow::Cow, iter, mem, ops::Range};
use thiserror::Error;
/// Describes a [`RenderBundleEncoder`].
@@ -159,6 +160,7 @@ impl RenderBundleEncoder {
let mut commands = Vec::new();
let mut base = self.base.as_ref();
let mut pipeline_layout_id = None::<id::Valid<id::PipelineLayoutId>>;
let mut buffer_memory_init_actions = Vec::new();
for &command in base.commands {
match command {
@@ -204,6 +206,8 @@ impl RenderBundleEncoder {
.map_pass_err(scope);
}
buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges);
state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets);
state
.trackers
@@ -262,6 +266,11 @@ impl RenderBundleEncoder {
Some(s) => offset + s.get(),
None => buffer.size,
};
buffer_memory_init_actions.push(MemoryInitTrackerAction {
id: buffer_id,
range: offset..end,
kind: MemoryInitKind::NeedsInitializedMemory,
});
state.index.set_format(index_format);
state.index.set_buffer(buffer_id, offset..end);
}
@@ -284,6 +293,11 @@ impl RenderBundleEncoder {
Some(s) => offset + s.get(),
None => buffer.size,
};
buffer_memory_init_actions.push(MemoryInitTrackerAction {
id: buffer_id,
range: offset..end,
kind: MemoryInitKind::NeedsInitializedMemory,
});
state.vertex[slot as usize].set_buffer(buffer_id, offset..end);
}
RenderCommand::SetPushConstant {
@@ -379,7 +393,7 @@ impl RenderBundleEncoder {
}
RenderCommand::MultiDrawIndirect {
buffer_id,
offset: _,
offset,
count: None,
indexed: false,
} => {
@@ -396,13 +410,26 @@ impl RenderBundleEncoder {
check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT)
.map_pass_err(scope)?;
buffer_memory_init_actions.extend(
buffer
.initialization_status
.check(
offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64),
)
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
commands.extend(state.flush_vertices());
commands.extend(state.flush_binds());
commands.push(command);
}
RenderCommand::MultiDrawIndirect {
buffer_id,
offset: _,
offset,
count: None,
indexed: true,
} => {
@@ -420,6 +447,19 @@ impl RenderBundleEncoder {
check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT)
.map_pass_err(scope)?;
buffer_memory_init_actions.extend(
buffer
.initialization_status
.check(
offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64),
)
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
commands.extend(state.index.flush());
commands.extend(state.flush_vertices());
commands.extend(state.flush_binds());
@@ -454,6 +494,7 @@ impl RenderBundleEncoder {
ref_count: device.life_guard.add_ref(),
},
used: state.trackers,
buffer_memory_init_actions,
context: self.context,
life_guard: LifeGuard::new(desc.label.borrow_or_default()),
})
@@ -502,6 +543,7 @@ pub struct RenderBundle {
base: BasePass<RenderCommand>,
pub(crate) device_id: Stored<id::DeviceId>,
pub(crate) used: TrackerSet,
pub(crate) buffer_memory_init_actions: Vec<MemoryInitTrackerAction<id::BufferId>>,
pub(crate) context: RenderPassContext,
pub(crate) life_guard: LifeGuard,
}

View File

@@ -11,6 +11,7 @@ use crate::{
},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id,
memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction},
resource::{Buffer, BufferUse, Texture},
span,
track::{TrackerSet, UsageConflict},
@@ -146,6 +147,8 @@ pub enum ComputePassErrorInner {
end_offset: u64,
buffer_size: u64,
},
#[error("buffer {0:?} is invalid or destroyed")]
InvalidBuffer(id::BufferId),
#[error(transparent)]
ResourceUsageConflict(#[from] UsageConflict),
#[error(transparent)]
@@ -329,6 +332,22 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.validate_dynamic_bindings(&temp_offsets)
.map_pass_err(scope)?;
cmd_buf.buffer_memory_init_actions.extend(
bind_group.used_buffer_ranges.iter().filter_map(
|action| match buffer_guard.get(action.id) {
Ok(buffer) => buffer
.initialization_status
.check(action.range.clone())
.map(|range| MemoryInitTrackerAction {
id: action.id,
range,
kind: action.kind,
}),
Err(_) => None,
},
),
);
if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry(
index as usize,
id::Valid(bind_group_id),
@@ -513,6 +532,19 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id))
.map_pass_err(scope)?;
let stride = 3 * 4; // 3 integers, x/y/z group size
cmd_buf.buffer_memory_init_actions.extend(
indirect_buffer
.initialization_status
.check(offset..(offset + stride))
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
state
.flush_states(
raw,

View File

@@ -24,6 +24,7 @@ use crate::{
device::{all_buffer_stages, all_image_stages},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id,
memory_init_tracker::MemoryInitTrackerAction,
resource::{Buffer, Texture},
span,
track::TrackerSet,
@@ -46,6 +47,7 @@ pub struct CommandBuffer<B: hal::Backend> {
pub(crate) device_id: Stored<id::DeviceId>,
pub(crate) trackers: TrackerSet,
pub(crate) used_swap_chains: SmallVec<[Stored<id::SwapChainId>; 1]>,
pub(crate) buffer_memory_init_actions: Vec<MemoryInitTrackerAction<id::BufferId>>,
limits: wgt::Limits,
private_features: PrivateFeatures,
has_labels: bool,
@@ -56,17 +58,6 @@ pub struct CommandBuffer<B: hal::Backend> {
}
impl<B: GfxBackend> CommandBuffer<B> {
fn get_encoder(
storage: &Storage<Self, id::CommandEncoderId>,
id: id::CommandEncoderId,
) -> Result<&Self, CommandEncoderError> {
match storage.get(id) {
Ok(cmd_buf) if cmd_buf.is_recording => Ok(cmd_buf),
Ok(_) => Err(CommandEncoderError::NotRecording),
Err(_) => Err(CommandEncoderError::Invalid),
}
}
fn get_encoder_mut(
storage: &mut Storage<Self, id::CommandEncoderId>,
id: id::CommandEncoderId,

View File

@@ -17,6 +17,7 @@ use crate::{
},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id,
memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction},
pipeline::PipelineFlags,
resource::{BufferUse, Texture, TextureUse, TextureView, TextureViewInner},
span,
@@ -1026,10 +1027,10 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let (cmd_buf_raw, trackers, used_swapchain, query_reset_state) = {
// read-only lock guard
let (cmb_guard, mut token) = hub.command_buffers.read(&mut token);
let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token);
let cmd_buf =
CommandBuffer::get_encoder(&*cmb_guard, encoder_id).map_pass_err(scope)?;
CommandBuffer::get_encoder_mut(&mut *cmb_guard, encoder_id).map_pass_err(scope)?;
let device = &device_guard[cmd_buf.device_id.value];
let mut raw = device.cmd_allocator.extend(cmd_buf);
unsafe {
@@ -1117,6 +1118,22 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.merge_extend(&bind_group.used)
.map_pass_err(scope)?;
cmd_buf.buffer_memory_init_actions.extend(
bind_group.used_buffer_ranges.iter().filter_map(|action| {
match buffer_guard.get(action.id) {
Ok(buffer) => buffer
.initialization_status
.check(action.range.clone())
.map(|range| MemoryInitTrackerAction {
id: action.id,
range,
kind: action.kind,
}),
Err(_) => None,
}
}),
);
if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry(
index as usize,
id::Valid(bind_group_id),
@@ -1293,6 +1310,17 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
state.index.format = Some(index_format);
state.index.update_limit();
cmd_buf.buffer_memory_init_actions.extend(
buffer
.initialization_status
.check(offset..end)
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
let range = hal::buffer::SubRange {
offset,
size: Some(end - offset),
@@ -1336,6 +1364,17 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
};
vertex_state.bound = true;
cmd_buf.buffer_memory_init_actions.extend(
buffer
.initialization_status
.check(offset..(offset + vertex_state.total_size))
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
let range = hal::buffer::SubRange {
offset,
size: size.map(|s| s.get()),
@@ -1588,6 +1627,17 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.map_pass_err(scope);
}
cmd_buf.buffer_memory_init_actions.extend(
indirect_buffer
.initialization_status
.check(offset..end_offset)
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
match indexed {
false => unsafe {
raw.draw_indirect(
@@ -1671,6 +1721,16 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
})
.map_pass_err(scope);
}
cmd_buf.buffer_memory_init_actions.extend(
indirect_buffer
.initialization_status
.check(offset..end_offset)
.map(|range| MemoryInitTrackerAction {
id: buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
let begin_count_offset = count_buffer_offset;
let end_count_offset = count_buffer_offset + 4;
@@ -1682,6 +1742,16 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
})
.map_pass_err(scope);
}
cmd_buf.buffer_memory_init_actions.extend(
count_buffer
.initialization_status
.check(count_buffer_offset..end_count_offset)
.map(|range| MemoryInitTrackerAction {
id: count_buffer_id,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
match indexed {
false => unsafe {
@@ -1815,6 +1885,23 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.map_err(RenderPassErrorInner::IncompatibleRenderBundle)
.map_pass_err(scope)?;
cmd_buf.buffer_memory_init_actions.extend(
bundle
.buffer_memory_init_actions
.iter()
.filter_map(|action| match buffer_guard.get(action.id) {
Ok(buffer) => buffer
.initialization_status
.check(action.range.clone())
.map(|range| MemoryInitTrackerAction {
id: action.id,
range,
kind: action.kind,
}),
Err(_) => None,
}),
);
unsafe {
bundle.execute(
&mut raw,

View File

@@ -10,6 +10,7 @@ use crate::{
device::{all_buffer_stages, all_image_stages},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id::{BufferId, CommandEncoderId, TextureId},
memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction},
resource::{BufferUse, Texture, TextureErrorDimension, TextureUse},
span,
track::TextureSelector,
@@ -149,6 +150,7 @@ pub(crate) fn texture_copy_view_to_hal<B: hal::Backend>(
}
/// Function copied with minor modifications from webgpu standard https://gpuweb.github.io/gpuweb/#valid-texture-copy-range
/// If successful, returns number of buffer bytes required for this copy.
pub(crate) fn validate_linear_texture_data(
layout: &wgt::TextureDataLayout,
format: wgt::TextureFormat,
@@ -156,7 +158,7 @@ pub(crate) fn validate_linear_texture_data(
buffer_side: CopySide,
bytes_per_block: BufferAddress,
copy_size: &Extent3d,
) -> Result<(), TransferError> {
) -> Result<BufferAddress, TransferError> {
// Convert all inputs to BufferAddress (u64) to prevent overflow issues
let copy_width = copy_size.width as BufferAddress;
let copy_height = copy_size.height as BufferAddress;
@@ -217,7 +219,7 @@ pub(crate) fn validate_linear_texture_data(
if copy_depth > 1 && rows_per_image == 0 {
return Err(TransferError::InvalidRowsPerImage);
}
Ok(())
Ok(required_bytes_in_copy)
}
/// Function copied with minor modifications from webgpu standard https://gpuweb.github.io/gpuweb/#valid-texture-copy-range
@@ -401,6 +403,28 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
return Ok(());
}
// Make sure source is initialized memory and mark dest as initialized.
cmd_buf.buffer_memory_init_actions.extend(
dst_buffer
.initialization_status
.check(destination_offset..(destination_offset + size))
.map(|range| MemoryInitTrackerAction {
id: destination,
range,
kind: MemoryInitKind::ImplicitlyInitialized,
}),
);
cmd_buf.buffer_memory_init_actions.extend(
src_buffer
.initialization_status
.check(source_offset..(source_offset + size))
.map(|range| MemoryInitTrackerAction {
id: source,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
let region = hal::command::BufferCopy {
src: source_offset,
dst: destination_offset,
@@ -505,7 +529,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
CopySide::Destination,
copy_size,
)?;
validate_linear_texture_data(
let required_buffer_bytes_in_copy = validate_linear_texture_data(
&source.layout,
dst_texture.format,
src_buffer.size,
@@ -514,6 +538,17 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
copy_size,
)?;
cmd_buf.buffer_memory_init_actions.extend(
src_buffer
.initialization_status
.check(source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy))
.map(|range| MemoryInitTrackerAction {
id: source.buffer,
range,
kind: MemoryInitKind::NeedsInitializedMemory,
}),
);
let (block_width, _) = dst_texture.format.describe().block_dimensions;
if !conv::is_valid_copy_dst_texture_format(dst_texture.format) {
Err(TransferError::CopyToForbiddenTextureFormat(
@@ -645,7 +680,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
CopySide::Source,
copy_size,
)?;
validate_linear_texture_data(
let required_buffer_bytes_in_copy = validate_linear_texture_data(
&destination.layout,
src_texture.format,
dst_buffer.size,
@@ -661,6 +696,20 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
))?
}
cmd_buf.buffer_memory_init_actions.extend(
dst_buffer
.initialization_status
.check(
destination.layout.offset
..(destination.layout.offset + required_buffer_bytes_in_copy),
)
.map(|range| MemoryInitTrackerAction {
id: destination.buffer,
range,
kind: MemoryInitKind::ImplicitlyInitialized,
}),
);
// WebGPU uses the physical size of the texture for copies whereas vulkan uses
// the virtual size. We have passed validation, so it's safe to use the
// image extent data directly. We want the provided copy size to be no larger than

View File

@@ -8,7 +8,9 @@ use crate::{
hub::{
GfxBackend, Global, GlobalIdentityHandlerFactory, Hub, Input, InvalidId, Storage, Token,
},
id, instance, pipeline, resource, span, swap_chain,
id, instance,
memory_init_tracker::{MemoryInitKind, MemoryInitTracker, MemoryInitTrackerAction},
pipeline, resource, span, swap_chain,
track::{BufferState, TextureSelector, TextureState, TrackerSet},
validation::{self, check_buffer_usage, check_texture_usage},
FastHashMap, FastHashSet, Label, LabelHelpers, LifeGuard, MultiRefCount, PrivateFeatures,
@@ -196,6 +198,30 @@ fn map_buffer<B: hal::Backend>(
}),
_ => None,
};
// Zero out uninitialized parts of the mapping. (Spec dictates all resources behave as if they were initialized with zero)
//
// If this is a read mapping, ideally we would use a `fill_buffer` command before reading the data from GPU (i.e. `invalidate_range`).
// However, this would require us to kick off and wait for a command buffer or piggy back on an existing one (the later is likely the only worthwhile option).
// As reading uninitialized memory isn't a particular important path to support,
// we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region.
//
// If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways.
let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways.
for uninitialized_range in buffer.initialization_status.drain(offset..(size + offset)) {
let num_bytes = uninitialized_range.end - uninitialized_range.start;
unsafe {
ptr::write_bytes(
ptr.as_ptr().offset(uninitialized_range.start as isize),
0,
num_bytes as usize,
)
};
if zero_init_needs_flush_now {
block.flush_range(raw, uninitialized_range.start, Some(num_bytes))?;
}
}
Ok(ptr)
}
@@ -470,6 +496,10 @@ impl<B: GfxBackend> Device<B> {
// we are going to be copying into it, internally
usage |= hal::buffer::Usage::TRANSFER_DST;
}
} else {
// We are required to zero out (initialize) all memory.
// This is done on demand using fill_buffer which requires write transfer usage!
usage |= hal::buffer::Usage::TRANSFER_DST;
}
if desc.usage.is_empty() {
@@ -542,7 +572,7 @@ impl<B: GfxBackend> Device<B> {
},
usage: desc.usage,
size: desc.size,
full_range: (),
initialization_status: MemoryInitTracker::new(desc.size),
sync_mapped_writes: None,
map_state: resource::BufferMapState::Idle,
life_guard: LifeGuard::new(desc.label.borrow_or_default()),
@@ -1237,6 +1267,7 @@ impl<B: GfxBackend> Device<B> {
// `BTreeMap` has ordered bindings as keys, which allows us to coalesce
// the descriptor writes into a single transaction.
let mut write_map = BTreeMap::new();
let mut used_buffer_ranges = Vec::new();
for entry in desc.entries.iter() {
let binding = entry.binding;
// Find the corresponding declaration in the layout
@@ -1329,6 +1360,12 @@ impl<B: GfxBackend> Device<B> {
return Err(Error::BindingZeroSize(bb.buffer_id));
}
used_buffer_ranges.push(MemoryInitTrackerAction {
id: bb.buffer_id,
range: bb.offset..(bb.offset + bind_size),
kind: MemoryInitKind::NeedsInitializedMemory,
});
let sub_range = hal::buffer::SubRange {
offset: bb.offset,
size: Some(bind_size),
@@ -1544,6 +1581,7 @@ impl<B: GfxBackend> Device<B> {
layout_id: id::Valid(desc.layout),
life_guard: LifeGuard::new(desc.label.borrow_or_default()),
used,
used_buffer_ranges,
dynamic_binding_info,
})
}
@@ -2530,7 +2568,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
usage: wgt::BufferUsage::MAP_WRITE | wgt::BufferUsage::COPY_SRC,
mapped_at_creation: false,
};
let stage = match device.create_buffer(device_id, &stage_desc, true) {
let mut stage = match device.create_buffer(device_id, &stage_desc, true) {
Ok(stage) => stage,
Err(e) => {
let (raw, memory) = buffer.raw.unwrap();
@@ -2561,6 +2599,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
break e.into();
}
};
// Zero initialize memory and then mark both staging and buffer as initialized
// (it's guaranteed that this is the case by the time the buffer is usable)
unsafe { ptr::write_bytes(ptr.as_ptr(), 0, buffer.size as usize) };
buffer.initialization_status.clear(0..buffer.size);
stage.initialization_status.clear(0..buffer.size);
buffer.map_state = resource::BufferMapState::Init {
ptr,
needs_flush: !stage_memory.is_coherent(),

View File

@@ -13,13 +13,14 @@ use crate::{
device::{alloc, DeviceError, WaitIdleError},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token},
id,
memory_init_tracker::MemoryInitKind,
resource::{BufferAccessError, BufferMapState, BufferUse, TextureUse},
span, FastHashSet,
span, FastHashMap, FastHashSet,
};
use hal::{command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _};
use smallvec::SmallVec;
use std::{iter, ptr};
use std::{iter, ops::Range, ptr};
use thiserror::Error;
struct StagingData<B: hal::Backend> {
@@ -271,6 +272,16 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
device.pending_writes.consume(stage);
device.pending_writes.dst_buffers.insert(buffer_id);
// Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding.
{
drop(buffer_guard);
let (mut buffer_guard, _) = hub.buffers.write(&mut token);
let dst = buffer_guard.get_mut(buffer_id).unwrap();
dst.initialization_status
.clear(buffer_offset..(buffer_offset + data_size));
}
Ok(())
}
@@ -458,6 +469,129 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
Ok(())
}
// Enacts all zero initializations required by the given command buffers
// Required commands are appended to device.pending_writes
fn initialize_used_uninitialized_memory<B: GfxBackend>(
&self,
queue_id: id::QueueId,
command_buffer_ids: &[id::CommandBufferId],
) -> Result<(), QueueSubmitError> {
if command_buffer_ids.is_empty() {
return Ok(());
}
let hub = B::hub(self);
let mut token = Token::root();
let mut required_buffer_inits = {
let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token);
let mut required_buffer_inits: FastHashMap<
id::BufferId,
Vec<Range<wgt::BufferAddress>>,
> = FastHashMap::default();
for &cmb_id in command_buffer_ids {
let cmdbuf = command_buffer_guard
.get(cmb_id)
.map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?;
if cmdbuf.buffer_memory_init_actions.len() == 0 {
continue;
}
let (mut buffer_guard, _) = hub.buffers.write(&mut token);
for buffer_use in cmdbuf.buffer_memory_init_actions.iter() {
let buffer = buffer_guard
.get_mut(buffer_use.id)
.map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?;
let uninitialized_ranges =
buffer.initialization_status.drain(buffer_use.range.clone());
match buffer_use.kind {
MemoryInitKind::ImplicitlyInitialized => {
uninitialized_ranges.for_each(drop);
}
MemoryInitKind::NeedsInitializedMemory => {
required_buffer_inits
.entry(buffer_use.id)
.or_default()
.extend(uninitialized_ranges);
}
}
}
}
required_buffer_inits
};
// Memory init is expected to be rare (means user relies on default zero!), so most of the time we early here!
if required_buffer_inits.is_empty() {
return Ok(());
}
let (mut device_guard, mut token) = hub.devices.write(&mut token);
let (buffer_guard, _) = hub.buffers.read(&mut token);
let device = device_guard
.get_mut(queue_id)
.map_err(|_| DeviceError::Invalid)?;
device
.pending_writes
.dst_buffers
.extend(required_buffer_inits.keys());
device.borrow_pending_writes(); // Call ensures there is a pending_writes cmdbuffer, but using the reference returned would make the borrow checker unhappy!
let pending_writes_cmd_buf = device.pending_writes.command_buffer.as_mut().unwrap();
let mut trackers = device.trackers.lock();
for (buffer_id, mut ranges) in required_buffer_inits.drain() {
// Collapse touching ranges. We can't do this any earlier since we only now gathered ranges from several different command buffers!
ranges.sort_by(|a, b| a.start.cmp(&b.start));
for i in (1..ranges.len()).rev() {
assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this!
if ranges[i].start == ranges[i - 1].end {
ranges[i - 1].end = ranges[i].end;
ranges.swap_remove(i); // Ordering not important at this point
}
}
let (buffer, transition) = trackers
.buffers
.use_replace(&*buffer_guard, buffer_id, (), BufferUse::COPY_DST)
.map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_id))?;
let &(ref buffer_raw, _) = buffer
.raw
.as_ref()
.ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))?;
unsafe {
pending_writes_cmd_buf.pipeline_barrier(
super::all_buffer_stages()..hal::pso::PipelineStage::TRANSFER,
hal::memory::Dependencies::empty(),
transition.map(|pending| pending.into_hal(buffer)),
);
}
for range in ranges {
let size = range.end - range.start;
assert!(range.start % 4 == 0, "Buffer {:?} has an uninitialized range with a start not aligned to 4 (start was {})", buffer, range.start);
assert!(size % 4 == 0, "Buffer {:?} has an uninitialized range with a size not aligned to 4 (size was {})", buffer, size);
unsafe {
pending_writes_cmd_buf.fill_buffer(
buffer_raw,
hal::buffer::SubRange {
offset: range.start,
size: Some(size),
},
0,
);
}
}
}
Ok(())
}
pub fn queue_submit<B: GfxBackend>(
&self,
queue_id: id::QueueId,
@@ -465,10 +599,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
) -> Result<(), QueueSubmitError> {
span!(_guard, INFO, "Queue::submit");
self.initialize_used_uninitialized_memory::<B>(queue_id, command_buffer_ids)?;
let hub = B::hub(self);
let mut token = Token::root();
let callbacks = {
let mut token = Token::root();
let (mut device_guard, mut token) = hub.devices.write(&mut token);
let device = device_guard
.get_mut(queue_id)
@@ -490,7 +626,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token);
let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token);
let (mut buffer_guard, mut token) = hub.buffers.write(&mut token);
let (texture_guard, mut token) = hub.textures.read(&mut token);
let (texture_guard, mut token) = hub.textures.write(&mut token);
let (texture_view_guard, mut token) = hub.texture_views.read(&mut token);
let (sampler_guard, _) = hub.samplers.read(&mut token);

View File

@@ -36,6 +36,7 @@ pub mod device;
pub mod hub;
pub mod id;
pub mod instance;
mod memory_init_tracker;
pub mod pipeline;
pub mod resource;
pub mod swap_chain;

View File

@@ -0,0 +1,279 @@
use std::ops::Range;
#[derive(Debug, Clone, Copy)]
pub(crate) enum MemoryInitKind {
// The memory range is going to be written by an already initialized source, thus doesn't need extra attention other than marking as initialized.
ImplicitlyInitialized,
// The memory range is going to be read, therefore needs to ensure prior initialization.
NeedsInitializedMemory,
}
#[derive(Debug, Clone)]
pub(crate) struct MemoryInitTrackerAction<ResourceId> {
pub(crate) id: ResourceId,
pub(crate) range: Range<wgt::BufferAddress>,
pub(crate) kind: MemoryInitKind,
}
/// Tracks initialization status of a linear range from 0..size
#[derive(Debug)]
pub(crate) struct MemoryInitTracker {
// Ordered, non overlapping list of all uninitialized ranges.
uninitialized_ranges: Vec<Range<wgt::BufferAddress>>,
}
pub(crate) struct MemoryInitTrackerDrain<'a> {
uninitialized_ranges: &'a mut Vec<Range<wgt::BufferAddress>>,
drain_range: Range<wgt::BufferAddress>,
first_index: usize,
next_index: usize,
}
impl<'a> Iterator for MemoryInitTrackerDrain<'a> {
type Item = Range<wgt::BufferAddress>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(r) = self
.uninitialized_ranges
.get(self.next_index)
.and_then(|range| {
if range.start < self.drain_range.end {
Some(range.clone())
} else {
None
}
})
{
self.next_index += 1;
Some(r.start.max(self.drain_range.start)..r.end.min(self.drain_range.end))
} else {
let num_affected = self.next_index - self.first_index;
if num_affected == 0 {
return None;
}
let first_range = &mut self.uninitialized_ranges[self.first_index];
// Split one "big" uninitialized range?
if num_affected == 1
&& first_range.start < self.drain_range.start
&& first_range.end > self.drain_range.end
{
let old_start = first_range.start;
first_range.start = self.drain_range.end;
self.uninitialized_ranges
.insert(self.first_index, old_start..self.drain_range.start);
}
// Adjust border ranges and delete everything in-between.
else {
let remove_start = if first_range.start >= self.drain_range.start {
self.first_index
} else {
first_range.end = self.drain_range.start;
self.first_index + 1
};
let last_range = &mut self.uninitialized_ranges[self.next_index - 1];
let remove_end = if last_range.end <= self.drain_range.end {
self.next_index
} else {
last_range.start = self.drain_range.end;
self.next_index - 1
};
self.uninitialized_ranges.drain(remove_start..remove_end);
}
None
}
}
}
impl MemoryInitTracker {
pub(crate) fn new(size: wgt::BufferAddress) -> Self {
Self {
uninitialized_ranges: vec![0..size],
}
}
// Search smallest range.end which is bigger than bound in O(log n) (with n being number of uninitialized ranges)
fn lower_bound(&self, bound: wgt::BufferAddress) -> usize {
// This is equivalent to, except that it may return an out of bounds index instead of
//self.uninitialized_ranges.iter().position(|r| r.end > bound)
// In future Rust versions this operation can be done with partition_point
// See https://github.com/rust-lang/rust/pull/73577/
let mut left = 0;
let mut right = self.uninitialized_ranges.len();
while left != right {
let mid = left + (right - left) / 2;
let value = unsafe { self.uninitialized_ranges.get_unchecked(mid) };
if value.end <= bound {
left = mid + 1;
} else {
right = mid;
}
}
left
}
// Checks if there's any uninitialized ranges within a query.
// If there are any, the range returned a the subrange of the query_range that contains all these uninitialized regions.
// Returned range may be larger than necessary (tradeoff for making this function O(log n))
pub(crate) fn check(
&self,
query_range: Range<wgt::BufferAddress>,
) -> Option<Range<wgt::BufferAddress>> {
let index = self.lower_bound(query_range.start);
self.uninitialized_ranges
.get(index)
.map(|start_range| {
if start_range.start < query_range.end {
let start = start_range.start.max(query_range.start);
match self.uninitialized_ranges.get(index + 1) {
Some(next_range) => {
if next_range.start < query_range.end {
// Would need to keep iterating for more accurate upper bound. Don't do that here.
Some(start..query_range.end)
} else {
Some(start..start_range.end.min(query_range.end))
}
}
None => Some(start..start_range.end.min(query_range.end)),
}
} else {
None
}
})
.flatten()
}
// Drains uninitialized ranges in a query range.
#[must_use]
pub(crate) fn drain<'a>(
&'a mut self,
drain_range: Range<wgt::BufferAddress>,
) -> MemoryInitTrackerDrain<'a> {
let index = self.lower_bound(drain_range.start);
MemoryInitTrackerDrain {
drain_range,
uninitialized_ranges: &mut self.uninitialized_ranges,
first_index: index,
next_index: index,
}
}
// Clears uninitialized ranges in a query range.
pub(crate) fn clear(&mut self, range: Range<wgt::BufferAddress>) {
self.drain(range).for_each(drop);
}
}
#[cfg(test)]
mod test {
use super::MemoryInitTracker;
use std::ops::Range;
#[test]
fn check_for_newly_created_tracker() {
let tracker = MemoryInitTracker::new(10);
assert_eq!(tracker.check(0..10), Some(0..10));
assert_eq!(tracker.check(0..3), Some(0..3));
assert_eq!(tracker.check(3..4), Some(3..4));
assert_eq!(tracker.check(4..10), Some(4..10));
}
#[test]
fn check_for_cleared_tracker() {
let mut tracker = MemoryInitTracker::new(10);
tracker.clear(0..10);
assert_eq!(tracker.check(0..10), None);
assert_eq!(tracker.check(0..3), None);
assert_eq!(tracker.check(3..4), None);
assert_eq!(tracker.check(4..10), None);
}
#[test]
fn check_for_partially_filled_tracker() {
let mut tracker = MemoryInitTracker::new(25);
// Two regions of uninitialized memory
tracker.clear(0..5);
tracker.clear(10..15);
tracker.clear(20..25);
assert_eq!(tracker.check(0..25), Some(5..25)); // entire range
assert_eq!(tracker.check(0..5), None); // left non-overlapping
assert_eq!(tracker.check(3..8), Some(5..8)); // left overlapping region
assert_eq!(tracker.check(3..17), Some(5..17)); // left overlapping region + contained region
assert_eq!(tracker.check(8..22), Some(8..22)); // right overlapping region + contained region (yes, doesn't fix range end!)
assert_eq!(tracker.check(17..22), Some(17..20)); // right overlapping region
assert_eq!(tracker.check(20..25), None); // right non-overlapping
}
#[test]
fn clear_already_cleared() {
let mut tracker = MemoryInitTracker::new(30);
tracker.clear(10..20);
// Overlapping with non-cleared
tracker.clear(5..15); // Left overlap
tracker.clear(15..25); // Right overlap
tracker.clear(0..30); // Inner overlap
// Clear fully cleared
tracker.clear(0..30);
assert_eq!(tracker.check(0..30), None);
}
#[test]
fn drain_never_returns_ranges_twice_for_same_range() {
let mut tracker = MemoryInitTracker::new(19);
assert_eq!(tracker.drain(0..19).count(), 1);
assert_eq!(tracker.drain(0..19).count(), 0);
let mut tracker = MemoryInitTracker::new(17);
assert_eq!(tracker.drain(5..8).count(), 1);
assert_eq!(tracker.drain(5..8).count(), 0);
assert_eq!(tracker.drain(1..3).count(), 1);
assert_eq!(tracker.drain(1..3).count(), 0);
assert_eq!(tracker.drain(7..13).count(), 1);
assert_eq!(tracker.drain(7..13).count(), 0);
}
#[test]
fn drain_splits_ranges_correctly() {
let mut tracker = MemoryInitTracker::new(1337);
assert_eq!(
tracker
.drain(21..42)
.collect::<Vec<Range<wgt::BufferAddress>>>(),
vec![21..42]
);
assert_eq!(
tracker
.drain(900..1000)
.collect::<Vec<Range<wgt::BufferAddress>>>(),
vec![900..1000]
);
// Splitted ranges.
assert_eq!(
tracker
.drain(5..1003)
.collect::<Vec<Range<wgt::BufferAddress>>>(),
vec![5..21, 42..900, 1000..1003]
);
assert_eq!(
tracker
.drain(0..1337)
.collect::<Vec<Range<wgt::BufferAddress>>>(),
vec![0..5, 1003..1337]
);
}
}

View File

@@ -6,6 +6,7 @@ use crate::{
device::{alloc::MemoryBlock, DeviceError, HostMap},
hub::Resource,
id::{DeviceId, SwapChainId, TextureId},
memory_init_tracker::MemoryInitTracker,
track::{TextureSelector, DUMMY_SELECTOR},
validation::MissingBufferUsageError,
Label, LifeGuard, RefCount, Stored,
@@ -160,7 +161,7 @@ pub struct Buffer<B: hal::Backend> {
pub(crate) device_id: Stored<DeviceId>,
pub(crate) usage: wgt::BufferUsage,
pub(crate) size: wgt::BufferAddress,
pub(crate) full_range: (),
pub(crate) initialization_status: MemoryInitTracker,
pub(crate) sync_mapped_writes: Option<hal::memory::Segment>,
pub(crate) life_guard: LifeGuard,
pub(crate) map_state: BufferMapState<B>,