Queue_submit zeros out uninitialized buffer regions now

again but with fine grained scheme now
This commit is contained in:
Andreas Reich
2021-01-24 16:04:07 +01:00
parent 9595b39bb3
commit 1efdc2b229
7 changed files with 168 additions and 92 deletions

View File

@@ -49,7 +49,7 @@ use crate::{
},
hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token},
id,
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
resource::BufferUse,
span,
track::{TrackerSet, UsageConflict},
@@ -269,7 +269,8 @@ impl RenderBundleEncoder {
};
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end),
range: offset..end,
kind: MemoryInitKind::NeedsInitializedMemory,
});
state.index.set_format(index_format);
state.index.set_buffer(buffer_id, offset..end);
@@ -295,7 +296,8 @@ impl RenderBundleEncoder {
};
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end),
range: offset..end,
kind: MemoryInitKind::NeedsInitializedMemory,
});
state.vertex[slot as usize].set_buffer(buffer_id, offset..end);
}
@@ -411,7 +413,8 @@ impl RenderBundleEncoder {
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(0..buffer.size),
range: 0..buffer.size,
kind: MemoryInitKind::NeedsInitializedMemory,
});
commands.extend(state.flush_vertices());
@@ -441,7 +444,8 @@ impl RenderBundleEncoder {
let stride = 4 * 4; // 4 integers, vertexCount, instanceCount, firstVertex, firstInstance
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(0..stride),
range: 0..stride,
kind: MemoryInitKind::NeedsInitializedMemory,
});
commands.extend(state.index.flush());

View File

@@ -11,7 +11,7 @@ use crate::{
},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id,
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
resource::{Buffer, BufferUse, Texture},
span,
track::{TrackerSet, UsageConflict},
@@ -523,9 +523,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.used_buffer_ranges
.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
offset..(offset + stride),
),
range: offset..(offset + stride),
kind: MemoryInitKind::NeedsInitializedMemory,
});
state

View File

@@ -17,7 +17,7 @@ use crate::{
},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id,
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
pipeline::PipelineFlags,
resource::{BufferUse, Texture, TextureUse, TextureView, TextureViewInner},
span,
@@ -1300,7 +1300,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end),
range: offset..end,
kind: MemoryInitKind::NeedsInitializedMemory,
});
let range = hal::buffer::SubRange {
@@ -1348,9 +1349,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
offset..(offset + vertex_state.total_size),
),
range: offset..(offset + vertex_state.total_size),
kind: MemoryInitKind::NeedsInitializedMemory,
});
let range = hal::buffer::SubRange {
@@ -1607,9 +1607,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
offset..end_offset,
),
range: offset..end_offset,
kind: MemoryInitKind::NeedsInitializedMemory,
});
match indexed {
@@ -1709,15 +1708,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
offset..end_offset,
),
range: offset..end_offset,
kind: MemoryInitKind::NeedsInitializedMemory,
});
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: count_buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
count_buffer_offset..end_count_offset,
),
range: count_buffer_offset..end_count_offset,
kind: MemoryInitKind::NeedsInitializedMemory,
});
match indexed {

View File

@@ -10,7 +10,7 @@ use crate::{
device::{all_buffer_stages, all_image_stages},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
id::{BufferId, CommandEncoderId, TextureId},
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
resource::{BufferUse, Texture, TextureErrorDimension, TextureUse},
span,
track::TextureSelector,
@@ -408,17 +408,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.used_buffer_ranges
.push(ResourceMemoryInitTrackerAction {
id: destination,
action: MemoryInitTrackerAction::ImplicitlyInitialized(
destination_offset..(destination_offset + size),
),
range: destination_offset..(destination_offset + size),
kind: MemoryInitKind::ImplicitlyInitialized,
});
cmd_buf
.used_buffer_ranges
.push(ResourceMemoryInitTrackerAction {
id: source,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
source_offset..(source_offset + size),
),
range: source_offset..(source_offset + size),
kind: MemoryInitKind::NeedsInitializedMemory,
});
let region = hal::command::BufferCopy {
@@ -538,9 +536,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.used_buffer_ranges
.push(ResourceMemoryInitTrackerAction {
id: source.buffer,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
),
range: source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
kind: MemoryInitKind::NeedsInitializedMemory,
});
// TODO: Mark dest texture memory as implicitly initialized here.
@@ -695,10 +692,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.used_buffer_ranges
.push(ResourceMemoryInitTrackerAction {
id: destination.buffer,
action: MemoryInitTrackerAction::ImplicitlyInitialized(
destination.layout.offset
..(destination.layout.offset + required_buffer_bytes_in_copy),
),
range: destination.layout.offset
..(destination.layout.offset + required_buffer_bytes_in_copy),
kind: MemoryInitKind::ImplicitlyInitialized,
});
// TODO: Mark dest texture memory as required to be initialized here.

View File

@@ -209,27 +209,22 @@ fn map_buffer<B: hal::Backend>(
// If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways.
let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways.
for uninitialized_segment in
buffer
.initialization_status
.drain_uninitialized_segments(hal::memory::Segment {
offset,
size: Some(size),
})
if let Some(uninitialized_ranges) = buffer
.initialization_status
.drain_uninitialized_ranges(offset..(size + offset))
{
unsafe {
ptr::write_bytes(
ptr.as_ptr().offset(uninitialized_segment.offset as isize),
0,
uninitialized_segment.size.unwrap_or(size) as usize,
)
};
if zero_init_needs_flush_now {
block.flush_range(
raw,
uninitialized_segment.offset,
uninitialized_segment.size,
)?;
for uninitialized_range in uninitialized_ranges {
let num_bytes = uninitialized_range.end - uninitialized_range.start;
unsafe {
ptr::write_bytes(
ptr.as_ptr().offset(uninitialized_range.start as isize),
0,
num_bytes as usize,
)
};
if zero_init_needs_flush_now {
block.flush_range(raw, uninitialized_range.start, Some(num_bytes))?;
}
}
}
@@ -507,6 +502,11 @@ impl<B: GfxBackend> Device<B> {
// we are going to be copying into it, internally
usage |= hal::buffer::Usage::TRANSFER_DST;
}
} else {
// We are required to zero out (initialize) all memory.
// This can be done via mapping, transfer or with a shader. However, right now we only implement it via transfer!
// TODO: Investigate if we could also do init via mapping or shader so that we need to alter the requested usage in less cases.
usage |= hal::buffer::Usage::TRANSFER_DST;
}
if desc.usage.is_empty() {
@@ -1369,9 +1369,8 @@ impl<B: GfxBackend> Device<B> {
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
id: bb.buffer_id,
action: MemoryInitTrackerAction::NeedsInitializedMemory(
bb.offset..(bb.offset + bind_size),
),
range: bb.offset..(bb.offset + bind_size),
kind: MemoryInitKind::NeedsInitializedMemory,
});
let sub_range = hal::buffer::SubRange {

View File

@@ -13,13 +13,14 @@ use crate::{
device::{alloc, DeviceError, WaitIdleError},
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token},
id,
memory_init_tracker::MemoryInitKind,
resource::{BufferAccessError, BufferMapState, BufferUse, TextureUse},
span, FastHashSet,
span, FastHashMap, FastHashSet,
};
use hal::{command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _};
use smallvec::SmallVec;
use std::{iter, ptr};
use std::{iter, ops::Range, ptr};
use thiserror::Error;
struct StagingData<B: hal::Backend> {
@@ -274,13 +275,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
// Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding.
{
let dst = buffer_guard.get_mut(buffer_id).unwrap();
for _ in dst
if let Some(uninitialized_ranges) = dst
.initialization_status
.drain_uninitialized_segments(hal::memory::Segment {
offset: buffer_offset,
size: Some(buffer_offset + data_size),
})
{}
.drain_uninitialized_ranges(buffer_offset..(buffer_offset + data_size))
{
uninitialized_ranges.for_each(drop);
}
}
Ok(())
@@ -478,9 +478,96 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
span!(_guard, INFO, "Queue::submit");
let hub = B::hub(self);
let mut token = Token::root();
// Insert zero initializations if necessary.
// Since we push those to the pending_writes buffer, this needs to happen before any other processing
if !command_buffer_ids.is_empty() {
// Note that in the vast majority of cases this remains empty!
let required_buffer_inits = {
let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token);
let (mut buffer_guard, _) = hub.buffers.write(&mut token);
let mut required_buffer_inits: FastHashMap<
id::BufferId,
Vec<Range<wgt::BufferAddress>>,
> = FastHashMap::default();
for &cmb_id in command_buffer_ids {
let cmdbuf = command_buffer_guard
.get(cmb_id)
.map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?;
for buffer_use in cmdbuf.used_buffer_ranges.iter() {
let buffer = buffer_guard
.get_mut(buffer_use.id)
.map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?;
if let Some(uninitialized_ranges) = buffer
.initialization_status
.drain_uninitialized_ranges(buffer_use.range.clone())
{
match buffer_use.kind {
MemoryInitKind::ImplicitlyInitialized => {
uninitialized_ranges.for_each(drop);
}
MemoryInitKind::NeedsInitializedMemory => {
required_buffer_inits
.entry(buffer_use.id)
.or_default()
.extend(uninitialized_ranges);
}
}
}
}
}
required_buffer_inits
};
if !required_buffer_inits.is_empty() {
let (mut device_guard, mut token) = hub.devices.write(&mut token);
let (buffer_guard, _) = hub.buffers.read(&mut token);
let device = device_guard
.get_mut(queue_id)
.map_err(|_| DeviceError::Invalid)?;
device
.pending_writes
.dst_buffers
.extend(required_buffer_inits.keys());
let pending_writes_cmd_buf = device.borrow_pending_writes();
for (buffer_id, ranges) in required_buffer_inits {
let buffer_raw = &buffer_guard
.get(buffer_id)
.map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_id))?
.raw
.as_ref()
.ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))?
.0;
// TODO: Currently, we implement zero init only via fill_buffer. Should we implement this also via map & shader/storage?
// Note that this requires TRANSFER usage on the buffer - see also device.create_buffer
// TODO: Collapse ranges
for range in ranges {
// TODO SHIPSTOPPER: Insert necessary barriers!!
unsafe {
pending_writes_cmd_buf.fill_buffer(
buffer_raw,
hal::buffer::SubRange {
offset: range.start,
size: Some(range.end - range.start),
},
0,
);
}
}
}
}
}
let callbacks = {
let mut token = Token::root();
let (mut device_guard, mut token) = hub.devices.write(&mut token);
let device = device_guard
.get_mut(queue_id)
@@ -601,9 +688,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}
// TODO
// go through memory_init_tracker of every buffer and gather which areas need to be cleared.
// execute resource transitions
let mut transit = device.cmd_allocator.extend(cmdbuf);
unsafe {

View File

@@ -1,18 +1,18 @@
use hal::memory::Segment;
use std::ops::Range;
#[derive(Debug, Clone)]
pub(crate) enum MemoryInitTrackerAction {
pub(crate) enum MemoryInitKind {
// The memory range is going to be written by an already initialized source, thus doesn't need extra attention.
ImplicitlyInitialized(Range<wgt::BufferAddress>),
ImplicitlyInitialized,
// The memory range is going to be read, therefore needs to ensure prior initialization.
NeedsInitializedMemory(Range<wgt::BufferAddress>),
NeedsInitializedMemory,
}
#[derive(Debug, Clone)]
pub(crate) struct ResourceMemoryInitTrackerAction<ResourceId> {
pub(crate) id: ResourceId,
pub(crate) action: MemoryInitTrackerAction,
pub(crate) range: Range<wgt::BufferAddress>,
pub(crate) kind: MemoryInitKind,
}
/// Tracks initialization status of a linear range from 0..size
@@ -34,15 +34,11 @@ impl MemoryInitTracker {
}
}
pub(crate) fn drain_uninitialized_segments<'a>(
#[must_use]
pub(crate) fn drain_uninitialized_ranges<'a>(
&'a mut self,
segment: Segment,
) -> impl Iterator<Item = Segment> + 'a {
let range = match segment.size {
Some(size) => segment.offset..(segment.offset + size),
None => segment.offset..self.uninitialized_ranges.initial_range().end,
};
range: Range<wgt::BufferAddress>,
) -> Option<impl Iterator<Item = Range<wgt::BufferAddress>> + 'a> {
let mut uninitialized_ranges: Vec<Range<wgt::BufferAddress>> = self
.uninitialized_ranges
.allocated_ranges()
@@ -58,22 +54,23 @@ impl MemoryInitTracker {
})
.collect();
std::iter::from_fn(move || {
if uninitialized_ranges.is_empty() {
return None;
}
Some(std::iter::from_fn(move || {
let range: Option<Range<wgt::BufferAddress>> =
uninitialized_ranges.last().map(|r| r.clone());
match range {
Some(range) => {
uninitialized_ranges.pop();
let result = Some(Segment {
offset: range.start,
size: Some(range.end - range.start),
});
let result = range.clone();
self.uninitialized_ranges.free_range(range);
result
Some(result)
}
None => None,
}
})
}))
}
}