mirror of
https://github.com/gfx-rs/wgpu.git
synced 2026-04-22 03:02:01 -04:00
Queue_submit zeros out uninitialized buffer regions now
again but with fine grained scheme now
This commit is contained in:
@@ -49,7 +49,7 @@ use crate::{
|
||||
},
|
||||
hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token},
|
||||
id,
|
||||
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
|
||||
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
|
||||
resource::BufferUse,
|
||||
span,
|
||||
track::{TrackerSet, UsageConflict},
|
||||
@@ -269,7 +269,8 @@ impl RenderBundleEncoder {
|
||||
};
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end),
|
||||
range: offset..end,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
state.index.set_format(index_format);
|
||||
state.index.set_buffer(buffer_id, offset..end);
|
||||
@@ -295,7 +296,8 @@ impl RenderBundleEncoder {
|
||||
};
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end),
|
||||
range: offset..end,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
state.vertex[slot as usize].set_buffer(buffer_id, offset..end);
|
||||
}
|
||||
@@ -411,7 +413,8 @@ impl RenderBundleEncoder {
|
||||
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(0..buffer.size),
|
||||
range: 0..buffer.size,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
commands.extend(state.flush_vertices());
|
||||
@@ -441,7 +444,8 @@ impl RenderBundleEncoder {
|
||||
let stride = 4 * 4; // 4 integers, vertexCount, instanceCount, firstVertex, firstInstance
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(0..stride),
|
||||
range: 0..stride,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
commands.extend(state.index.flush());
|
||||
|
||||
@@ -11,7 +11,7 @@ use crate::{
|
||||
},
|
||||
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
|
||||
id,
|
||||
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
|
||||
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
|
||||
resource::{Buffer, BufferUse, Texture},
|
||||
span,
|
||||
track::{TrackerSet, UsageConflict},
|
||||
@@ -523,9 +523,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
.used_buffer_ranges
|
||||
.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
offset..(offset + stride),
|
||||
),
|
||||
range: offset..(offset + stride),
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
state
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::{
|
||||
},
|
||||
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
|
||||
id,
|
||||
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
|
||||
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
|
||||
pipeline::PipelineFlags,
|
||||
resource::{BufferUse, Texture, TextureUse, TextureView, TextureViewInner},
|
||||
span,
|
||||
@@ -1300,7 +1300,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(offset..end),
|
||||
range: offset..end,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
let range = hal::buffer::SubRange {
|
||||
@@ -1348,9 +1349,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
offset..(offset + vertex_state.total_size),
|
||||
),
|
||||
range: offset..(offset + vertex_state.total_size),
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
let range = hal::buffer::SubRange {
|
||||
@@ -1607,9 +1607,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
offset..end_offset,
|
||||
),
|
||||
range: offset..end_offset,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
match indexed {
|
||||
@@ -1709,15 +1708,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
offset..end_offset,
|
||||
),
|
||||
range: offset..end_offset,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: count_buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
count_buffer_offset..end_count_offset,
|
||||
),
|
||||
range: count_buffer_offset..end_count_offset,
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
match indexed {
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::{
|
||||
device::{all_buffer_stages, all_image_stages},
|
||||
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
|
||||
id::{BufferId, CommandEncoderId, TextureId},
|
||||
memory_init_tracker::{MemoryInitTrackerAction, ResourceMemoryInitTrackerAction},
|
||||
memory_init_tracker::{MemoryInitKind, ResourceMemoryInitTrackerAction},
|
||||
resource::{BufferUse, Texture, TextureErrorDimension, TextureUse},
|
||||
span,
|
||||
track::TextureSelector,
|
||||
@@ -408,17 +408,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
.used_buffer_ranges
|
||||
.push(ResourceMemoryInitTrackerAction {
|
||||
id: destination,
|
||||
action: MemoryInitTrackerAction::ImplicitlyInitialized(
|
||||
destination_offset..(destination_offset + size),
|
||||
),
|
||||
range: destination_offset..(destination_offset + size),
|
||||
kind: MemoryInitKind::ImplicitlyInitialized,
|
||||
});
|
||||
cmd_buf
|
||||
.used_buffer_ranges
|
||||
.push(ResourceMemoryInitTrackerAction {
|
||||
id: source,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
source_offset..(source_offset + size),
|
||||
),
|
||||
range: source_offset..(source_offset + size),
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
let region = hal::command::BufferCopy {
|
||||
@@ -538,9 +536,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
.used_buffer_ranges
|
||||
.push(ResourceMemoryInitTrackerAction {
|
||||
id: source.buffer,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
|
||||
),
|
||||
range: source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
// TODO: Mark dest texture memory as implicitly initialized here.
|
||||
|
||||
@@ -695,10 +692,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
.used_buffer_ranges
|
||||
.push(ResourceMemoryInitTrackerAction {
|
||||
id: destination.buffer,
|
||||
action: MemoryInitTrackerAction::ImplicitlyInitialized(
|
||||
destination.layout.offset
|
||||
..(destination.layout.offset + required_buffer_bytes_in_copy),
|
||||
),
|
||||
range: destination.layout.offset
|
||||
..(destination.layout.offset + required_buffer_bytes_in_copy),
|
||||
kind: MemoryInitKind::ImplicitlyInitialized,
|
||||
});
|
||||
// TODO: Mark dest texture memory as required to be initialized here.
|
||||
|
||||
|
||||
@@ -209,27 +209,22 @@ fn map_buffer<B: hal::Backend>(
|
||||
// If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways.
|
||||
let zero_init_needs_flush_now = !block.is_coherent() && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways.
|
||||
|
||||
for uninitialized_segment in
|
||||
buffer
|
||||
.initialization_status
|
||||
.drain_uninitialized_segments(hal::memory::Segment {
|
||||
offset,
|
||||
size: Some(size),
|
||||
})
|
||||
if let Some(uninitialized_ranges) = buffer
|
||||
.initialization_status
|
||||
.drain_uninitialized_ranges(offset..(size + offset))
|
||||
{
|
||||
unsafe {
|
||||
ptr::write_bytes(
|
||||
ptr.as_ptr().offset(uninitialized_segment.offset as isize),
|
||||
0,
|
||||
uninitialized_segment.size.unwrap_or(size) as usize,
|
||||
)
|
||||
};
|
||||
if zero_init_needs_flush_now {
|
||||
block.flush_range(
|
||||
raw,
|
||||
uninitialized_segment.offset,
|
||||
uninitialized_segment.size,
|
||||
)?;
|
||||
for uninitialized_range in uninitialized_ranges {
|
||||
let num_bytes = uninitialized_range.end - uninitialized_range.start;
|
||||
unsafe {
|
||||
ptr::write_bytes(
|
||||
ptr.as_ptr().offset(uninitialized_range.start as isize),
|
||||
0,
|
||||
num_bytes as usize,
|
||||
)
|
||||
};
|
||||
if zero_init_needs_flush_now {
|
||||
block.flush_range(raw, uninitialized_range.start, Some(num_bytes))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,6 +502,11 @@ impl<B: GfxBackend> Device<B> {
|
||||
// we are going to be copying into it, internally
|
||||
usage |= hal::buffer::Usage::TRANSFER_DST;
|
||||
}
|
||||
} else {
|
||||
// We are required to zero out (initialize) all memory.
|
||||
// This can be done via mapping, transfer or with a shader. However, right now we only implement it via transfer!
|
||||
// TODO: Investigate if we could also do init via mapping or shader so that we need to alter the requested usage in less cases.
|
||||
usage |= hal::buffer::Usage::TRANSFER_DST;
|
||||
}
|
||||
|
||||
if desc.usage.is_empty() {
|
||||
@@ -1369,9 +1369,8 @@ impl<B: GfxBackend> Device<B> {
|
||||
|
||||
used_buffer_ranges.push(ResourceMemoryInitTrackerAction {
|
||||
id: bb.buffer_id,
|
||||
action: MemoryInitTrackerAction::NeedsInitializedMemory(
|
||||
bb.offset..(bb.offset + bind_size),
|
||||
),
|
||||
range: bb.offset..(bb.offset + bind_size),
|
||||
kind: MemoryInitKind::NeedsInitializedMemory,
|
||||
});
|
||||
|
||||
let sub_range = hal::buffer::SubRange {
|
||||
|
||||
@@ -13,13 +13,14 @@ use crate::{
|
||||
device::{alloc, DeviceError, WaitIdleError},
|
||||
hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token},
|
||||
id,
|
||||
memory_init_tracker::MemoryInitKind,
|
||||
resource::{BufferAccessError, BufferMapState, BufferUse, TextureUse},
|
||||
span, FastHashSet,
|
||||
span, FastHashMap, FastHashSet,
|
||||
};
|
||||
|
||||
use hal::{command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _};
|
||||
use smallvec::SmallVec;
|
||||
use std::{iter, ptr};
|
||||
use std::{iter, ops::Range, ptr};
|
||||
use thiserror::Error;
|
||||
|
||||
struct StagingData<B: hal::Backend> {
|
||||
@@ -274,13 +275,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
// Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding.
|
||||
{
|
||||
let dst = buffer_guard.get_mut(buffer_id).unwrap();
|
||||
for _ in dst
|
||||
if let Some(uninitialized_ranges) = dst
|
||||
.initialization_status
|
||||
.drain_uninitialized_segments(hal::memory::Segment {
|
||||
offset: buffer_offset,
|
||||
size: Some(buffer_offset + data_size),
|
||||
})
|
||||
{}
|
||||
.drain_uninitialized_ranges(buffer_offset..(buffer_offset + data_size))
|
||||
{
|
||||
uninitialized_ranges.for_each(drop);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -478,9 +478,96 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
span!(_guard, INFO, "Queue::submit");
|
||||
|
||||
let hub = B::hub(self);
|
||||
let mut token = Token::root();
|
||||
|
||||
// Insert zero initializations if necessary.
|
||||
// Since we push those to the pending_writes buffer, this needs to happen before any other processing
|
||||
if !command_buffer_ids.is_empty() {
|
||||
// Note that in the vast majority of cases this remains empty!
|
||||
let required_buffer_inits = {
|
||||
let (command_buffer_guard, mut token) = hub.command_buffers.read(&mut token);
|
||||
let (mut buffer_guard, _) = hub.buffers.write(&mut token);
|
||||
|
||||
let mut required_buffer_inits: FastHashMap<
|
||||
id::BufferId,
|
||||
Vec<Range<wgt::BufferAddress>>,
|
||||
> = FastHashMap::default();
|
||||
|
||||
for &cmb_id in command_buffer_ids {
|
||||
let cmdbuf = command_buffer_guard
|
||||
.get(cmb_id)
|
||||
.map_err(|_| QueueSubmitError::InvalidCommandBuffer(cmb_id))?;
|
||||
|
||||
for buffer_use in cmdbuf.used_buffer_ranges.iter() {
|
||||
let buffer = buffer_guard
|
||||
.get_mut(buffer_use.id)
|
||||
.map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?;
|
||||
|
||||
if let Some(uninitialized_ranges) = buffer
|
||||
.initialization_status
|
||||
.drain_uninitialized_ranges(buffer_use.range.clone())
|
||||
{
|
||||
match buffer_use.kind {
|
||||
MemoryInitKind::ImplicitlyInitialized => {
|
||||
uninitialized_ranges.for_each(drop);
|
||||
}
|
||||
MemoryInitKind::NeedsInitializedMemory => {
|
||||
required_buffer_inits
|
||||
.entry(buffer_use.id)
|
||||
.or_default()
|
||||
.extend(uninitialized_ranges);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
required_buffer_inits
|
||||
};
|
||||
|
||||
if !required_buffer_inits.is_empty() {
|
||||
let (mut device_guard, mut token) = hub.devices.write(&mut token);
|
||||
let (buffer_guard, _) = hub.buffers.read(&mut token);
|
||||
let device = device_guard
|
||||
.get_mut(queue_id)
|
||||
.map_err(|_| DeviceError::Invalid)?;
|
||||
|
||||
device
|
||||
.pending_writes
|
||||
.dst_buffers
|
||||
.extend(required_buffer_inits.keys());
|
||||
let pending_writes_cmd_buf = device.borrow_pending_writes();
|
||||
|
||||
for (buffer_id, ranges) in required_buffer_inits {
|
||||
let buffer_raw = &buffer_guard
|
||||
.get(buffer_id)
|
||||
.map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_id))?
|
||||
.raw
|
||||
.as_ref()
|
||||
.ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))?
|
||||
.0;
|
||||
|
||||
// TODO: Currently, we implement zero init only via fill_buffer. Should we implement this also via map & shader/storage?
|
||||
// Note that this requires TRANSFER usage on the buffer - see also device.create_buffer
|
||||
|
||||
// TODO: Collapse ranges
|
||||
for range in ranges {
|
||||
// TODO SHIPSTOPPER: Insert necessary barriers!!
|
||||
unsafe {
|
||||
pending_writes_cmd_buf.fill_buffer(
|
||||
buffer_raw,
|
||||
hal::buffer::SubRange {
|
||||
offset: range.start,
|
||||
size: Some(range.end - range.start),
|
||||
},
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let callbacks = {
|
||||
let mut token = Token::root();
|
||||
let (mut device_guard, mut token) = hub.devices.write(&mut token);
|
||||
let device = device_guard
|
||||
.get_mut(queue_id)
|
||||
@@ -601,9 +688,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO
|
||||
// go through memory_init_tracker of every buffer and gather which areas need to be cleared.
|
||||
|
||||
// execute resource transitions
|
||||
let mut transit = device.cmd_allocator.extend(cmdbuf);
|
||||
unsafe {
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use hal::memory::Segment;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum MemoryInitTrackerAction {
|
||||
pub(crate) enum MemoryInitKind {
|
||||
// The memory range is going to be written by an already initialized source, thus doesn't need extra attention.
|
||||
ImplicitlyInitialized(Range<wgt::BufferAddress>),
|
||||
ImplicitlyInitialized,
|
||||
// The memory range is going to be read, therefore needs to ensure prior initialization.
|
||||
NeedsInitializedMemory(Range<wgt::BufferAddress>),
|
||||
NeedsInitializedMemory,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ResourceMemoryInitTrackerAction<ResourceId> {
|
||||
pub(crate) id: ResourceId,
|
||||
pub(crate) action: MemoryInitTrackerAction,
|
||||
pub(crate) range: Range<wgt::BufferAddress>,
|
||||
pub(crate) kind: MemoryInitKind,
|
||||
}
|
||||
|
||||
/// Tracks initialization status of a linear range from 0..size
|
||||
@@ -34,15 +34,11 @@ impl MemoryInitTracker {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn drain_uninitialized_segments<'a>(
|
||||
#[must_use]
|
||||
pub(crate) fn drain_uninitialized_ranges<'a>(
|
||||
&'a mut self,
|
||||
segment: Segment,
|
||||
) -> impl Iterator<Item = Segment> + 'a {
|
||||
let range = match segment.size {
|
||||
Some(size) => segment.offset..(segment.offset + size),
|
||||
None => segment.offset..self.uninitialized_ranges.initial_range().end,
|
||||
};
|
||||
|
||||
range: Range<wgt::BufferAddress>,
|
||||
) -> Option<impl Iterator<Item = Range<wgt::BufferAddress>> + 'a> {
|
||||
let mut uninitialized_ranges: Vec<Range<wgt::BufferAddress>> = self
|
||||
.uninitialized_ranges
|
||||
.allocated_ranges()
|
||||
@@ -58,22 +54,23 @@ impl MemoryInitTracker {
|
||||
})
|
||||
.collect();
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
if uninitialized_ranges.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(std::iter::from_fn(move || {
|
||||
let range: Option<Range<wgt::BufferAddress>> =
|
||||
uninitialized_ranges.last().map(|r| r.clone());
|
||||
match range {
|
||||
Some(range) => {
|
||||
uninitialized_ranges.pop();
|
||||
let result = Some(Segment {
|
||||
offset: range.start,
|
||||
size: Some(range.end - range.start),
|
||||
});
|
||||
let result = range.clone();
|
||||
self.uninitialized_ranges.free_range(range);
|
||||
result
|
||||
Some(result)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
})
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user