mirror of
https://github.com/gfx-rs/wgpu.git
synced 2026-04-22 03:02:01 -04:00
Merge #679
679: Enforce copy buffer-texture alignment r=cwfitzgerald a=kvark **Connections** This is a follow-up to #666 **Description** We are now enforcing the `bytes_per_row` on copy-texture copies to `COPY_BYTES_PER_ROW_ALIGNMENT`. We allow it being non-aligned for `write_texture`, which now has the code to properly align the staging space it uses, and to copy the rows one by one with proper alignment. We are also moving `BufferSize` to wgpu-types, because it's required for wgpu-rs to build. **Testing** Testing this needs https://github.com/gfx-rs/wgpu-rs/pull/328, which is blocked by https://github.com/gfx-rs/wgpu-rs/pull/323 Co-authored-by: Dzmitry Malyshau <kvarkus@gmail.com>
This commit is contained in:
@@ -307,7 +307,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
let hub = B::hub(self);
|
||||
let mut token = Token::root();
|
||||
|
||||
let (adapter_guard, mut token) = hub.adapters.read(&mut token);
|
||||
let (device_guard, mut token) = hub.devices.read(&mut token);
|
||||
let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token);
|
||||
|
||||
@@ -371,13 +370,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
};
|
||||
|
||||
let (context, sample_count) = {
|
||||
use hal::{adapter::PhysicalDevice as _, device::Device as _};
|
||||
use hal::device::Device as _;
|
||||
|
||||
let limits = adapter_guard[device.adapter_id.value]
|
||||
.raw
|
||||
.physical_device
|
||||
.limits();
|
||||
let samples_count_limit = limits.framebuffer_color_sample_counts;
|
||||
let samples_count_limit = device.hal_limits.framebuffer_color_sample_counts;
|
||||
let base_trackers = &cmb.trackers;
|
||||
|
||||
let mut extent = None;
|
||||
|
||||
@@ -199,14 +199,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
.surface_desc()
|
||||
.bits as u32
|
||||
/ BITS_PER_BYTE;
|
||||
let buffer_width = source.layout.bytes_per_row / bytes_per_texel;
|
||||
assert_eq!(wgt::COPY_BYTES_PER_ROW_ALIGNMENT % bytes_per_texel, 0);
|
||||
assert_eq!(
|
||||
source.layout.bytes_per_row % bytes_per_texel,
|
||||
source.layout.bytes_per_row % wgt::COPY_BYTES_PER_ROW_ALIGNMENT,
|
||||
0,
|
||||
"Source bytes per row ({}) must be a multiple of bytes per texel ({})",
|
||||
"Source bytes per row ({}) must be a multiple of {}",
|
||||
source.layout.bytes_per_row,
|
||||
bytes_per_texel
|
||||
wgt::COPY_BYTES_PER_ROW_ALIGNMENT
|
||||
);
|
||||
let buffer_width = source.layout.bytes_per_row / bytes_per_texel;
|
||||
let region = hal::command::BufferImageCopy {
|
||||
buffer_offset: source.layout.offset,
|
||||
buffer_width,
|
||||
@@ -286,14 +287,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
.surface_desc()
|
||||
.bits as u32
|
||||
/ BITS_PER_BYTE;
|
||||
let buffer_width = destination.layout.bytes_per_row / bytes_per_texel;
|
||||
assert_eq!(wgt::COPY_BYTES_PER_ROW_ALIGNMENT % bytes_per_texel, 0);
|
||||
assert_eq!(
|
||||
destination.layout.bytes_per_row % bytes_per_texel,
|
||||
destination.layout.bytes_per_row % wgt::COPY_BYTES_PER_ROW_ALIGNMENT,
|
||||
0,
|
||||
"Destination bytes per row ({}) must be a multiple of bytes per texel ({})",
|
||||
"Destination bytes per row ({}) must be a multiple of {}",
|
||||
destination.layout.bytes_per_row,
|
||||
bytes_per_texel
|
||||
wgt::COPY_BYTES_PER_ROW_ALIGNMENT
|
||||
);
|
||||
let buffer_width = destination.layout.bytes_per_row / bytes_per_texel;
|
||||
let region = hal::command::BufferImageCopy {
|
||||
buffer_offset: destination.layout.offset,
|
||||
buffer_width,
|
||||
|
||||
@@ -202,6 +202,7 @@ pub struct Device<B: hal::Backend> {
|
||||
// Life tracker should be locked right after the device and before anything else.
|
||||
life_tracker: Mutex<life::LifetimeTracker<B>>,
|
||||
temp_suspected: life::SuspectedResources,
|
||||
pub(crate) hal_limits: hal::Limits,
|
||||
pub(crate) private_features: PrivateFeatures,
|
||||
limits: wgt::Limits,
|
||||
extensions: wgt::Extensions,
|
||||
@@ -216,7 +217,7 @@ impl<B: GfxBackend> Device<B> {
|
||||
adapter_id: Stored<id::AdapterId>,
|
||||
queue_group: hal::queue::QueueGroup<B>,
|
||||
mem_props: hal::adapter::MemoryProperties,
|
||||
non_coherent_atom_size: u64,
|
||||
hal_limits: hal::Limits,
|
||||
supports_texture_d24_s8: bool,
|
||||
desc: &wgt::DeviceDescriptor,
|
||||
trace_path: Option<&std::path::Path>,
|
||||
@@ -237,7 +238,7 @@ impl<B: GfxBackend> Device<B> {
|
||||
gfx_memory::LinearConfig {
|
||||
linear_size: 0x100_0000,
|
||||
},
|
||||
non_coherent_atom_size,
|
||||
hal_limits.non_coherent_atom_size as u64,
|
||||
)
|
||||
};
|
||||
#[cfg(not(feature = "trace"))]
|
||||
@@ -273,6 +274,7 @@ impl<B: GfxBackend> Device<B> {
|
||||
None
|
||||
}
|
||||
}),
|
||||
hal_limits,
|
||||
private_features: PrivateFeatures {
|
||||
supports_texture_d24_s8,
|
||||
},
|
||||
|
||||
@@ -61,19 +61,16 @@ impl<B: hal::Backend> PendingWrites<B> {
|
||||
}
|
||||
|
||||
impl<B: hal::Backend> super::Device<B> {
|
||||
fn stage_data(&mut self, data: &[u8]) -> StagingData<B> {
|
||||
fn prepare_stage(&mut self, size: wgt::BufferAddress) -> StagingData<B> {
|
||||
let mut buffer = unsafe {
|
||||
self.raw
|
||||
.create_buffer(
|
||||
data.len() as wgt::BufferAddress,
|
||||
hal::buffer::Usage::TRANSFER_SRC,
|
||||
)
|
||||
.create_buffer(size, hal::buffer::Usage::TRANSFER_SRC)
|
||||
.unwrap()
|
||||
};
|
||||
//TODO: do we need to transition into HOST_WRITE access first?
|
||||
let requirements = unsafe { self.raw.get_buffer_requirements(&buffer) };
|
||||
|
||||
let mut memory = self
|
||||
let memory = self
|
||||
.mem_allocator
|
||||
.lock()
|
||||
.allocate(
|
||||
@@ -92,12 +89,6 @@ impl<B: hal::Backend> super::Device<B> {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let mut mapped = memory.map(&self.raw, hal::memory::Segment::ALL).unwrap();
|
||||
unsafe { mapped.write(&self.raw, hal::memory::Segment::ALL) }
|
||||
.unwrap()
|
||||
.slice[..data.len()]
|
||||
.copy_from_slice(data);
|
||||
|
||||
let comb = match self.pending_writes.command_buffer.take() {
|
||||
Some(comb) => comb,
|
||||
None => {
|
||||
@@ -147,7 +138,17 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
None => {}
|
||||
}
|
||||
|
||||
let mut stage = device.stage_data(data);
|
||||
let mut stage = device.prepare_stage(data.len() as wgt::BufferAddress);
|
||||
{
|
||||
let mut mapped = stage
|
||||
.memory
|
||||
.map(&device.raw, hal::memory::Segment::ALL)
|
||||
.unwrap();
|
||||
unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) }
|
||||
.unwrap()
|
||||
.slice[..data.len()]
|
||||
.copy_from_slice(data);
|
||||
}
|
||||
|
||||
let mut trackers = device.trackers.lock();
|
||||
let (dst, transition) =
|
||||
@@ -217,7 +218,45 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
None => {}
|
||||
}
|
||||
|
||||
let mut stage = device.stage_data(data);
|
||||
let texture_format = texture_guard[destination.texture].format;
|
||||
let bytes_per_texel = conv::map_texture_format(texture_format, device.private_features)
|
||||
.surface_desc()
|
||||
.bits as u32
|
||||
/ BITS_PER_BYTE;
|
||||
|
||||
let stage_bytes_per_row = get_lowest_common_denom(
|
||||
device.hal_limits.optimal_buffer_copy_pitch_alignment as u32,
|
||||
bytes_per_texel,
|
||||
);
|
||||
let stage_size = stage_bytes_per_row as u64
|
||||
* ((size.depth - 1) * data_layout.rows_per_image + size.height) as u64;
|
||||
let mut stage = device.prepare_stage(stage_size);
|
||||
{
|
||||
let mut mapped = stage
|
||||
.memory
|
||||
.map(&device.raw, hal::memory::Segment::ALL)
|
||||
.unwrap();
|
||||
let mapping = unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) }.unwrap();
|
||||
if stage_bytes_per_row == data_layout.bytes_per_row {
|
||||
// Unlikely case of the data already being aligned optimally.
|
||||
mapping.slice[..stage_size as usize].copy_from_slice(data);
|
||||
} else {
|
||||
// Copy row by row into the optimal alignment.
|
||||
let copy_bytes_per_row =
|
||||
stage_bytes_per_row.min(data_layout.bytes_per_row) as usize;
|
||||
for layer in 0..size.depth {
|
||||
let rows_offset = layer * data_layout.rows_per_image;
|
||||
for row in 0..size.height {
|
||||
let data_offset =
|
||||
(rows_offset + row) as usize * data_layout.bytes_per_row as usize;
|
||||
let stage_offset =
|
||||
(rows_offset + row) as usize * stage_bytes_per_row as usize;
|
||||
mapping.slice[stage_offset..stage_offset + copy_bytes_per_row]
|
||||
.copy_from_slice(&data[data_offset..data_offset + copy_bytes_per_row]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut trackers = device.trackers.lock();
|
||||
let (dst, transition) = trackers.textures.use_replace(
|
||||
@@ -235,21 +274,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
let last_submit_index = device.life_guard.submission_index.load(Ordering::Relaxed);
|
||||
dst.life_guard.use_at(last_submit_index + 1);
|
||||
|
||||
let bytes_per_texel = conv::map_texture_format(dst.format, device.private_features)
|
||||
.surface_desc()
|
||||
.bits as u32
|
||||
/ BITS_PER_BYTE;
|
||||
let buffer_width = data_layout.bytes_per_row / bytes_per_texel;
|
||||
assert_eq!(
|
||||
data_layout.bytes_per_row % bytes_per_texel,
|
||||
0,
|
||||
"Source bytes per row ({}) must be a multiple of bytes per texel ({})",
|
||||
data_layout.bytes_per_row,
|
||||
bytes_per_texel
|
||||
);
|
||||
let region = hal::command::BufferImageCopy {
|
||||
buffer_offset: 0,
|
||||
buffer_width,
|
||||
buffer_width: stage_bytes_per_row / bytes_per_texel,
|
||||
buffer_height: data_layout.rows_per_image,
|
||||
image_layers,
|
||||
image_offset,
|
||||
@@ -467,3 +494,40 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
super::fire_map_callbacks(callbacks);
|
||||
}
|
||||
}
|
||||
|
||||
fn get_lowest_common_denom(a: u32, b: u32) -> u32 {
|
||||
let gcd = if a >= b {
|
||||
get_greatest_common_divisor(a, b)
|
||||
} else {
|
||||
get_greatest_common_divisor(b, a)
|
||||
};
|
||||
a * b / gcd
|
||||
}
|
||||
|
||||
fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 {
|
||||
assert!(a >= b);
|
||||
loop {
|
||||
let c = a % b;
|
||||
if c == 0 {
|
||||
return b;
|
||||
} else {
|
||||
a = b;
|
||||
b = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lcd() {
|
||||
assert_eq!(get_lowest_common_denom(2, 2), 2);
|
||||
assert_eq!(get_lowest_common_denom(2, 3), 6);
|
||||
assert_eq!(get_lowest_common_denom(6, 4), 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gcd() {
|
||||
assert_eq!(get_greatest_common_divisor(5, 1), 1);
|
||||
assert_eq!(get_greatest_common_divisor(4, 2), 2);
|
||||
assert_eq!(get_greatest_common_divisor(6, 4), 2);
|
||||
assert_eq!(get_greatest_common_divisor(7, 7), 7);
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use hal::{
|
||||
self,
|
||||
adapter::{AdapterInfo as HalAdapterInfo, DeviceType as HalDeviceType, PhysicalDevice as _},
|
||||
queue::QueueFamily as _,
|
||||
window::Surface as _,
|
||||
@@ -652,7 +651,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
},
|
||||
gpu.queue_groups.swap_remove(0),
|
||||
mem_props,
|
||||
limits.non_coherent_atom_size as u64,
|
||||
limits,
|
||||
supports_texture_d24_s8,
|
||||
desc,
|
||||
trace_path,
|
||||
|
||||
@@ -101,6 +101,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
let (device_guard, mut token) = hub.devices.read(&mut token);
|
||||
let (mut swap_chain_guard, mut token) = hub.swap_chains.write(&mut token);
|
||||
let sc = &mut swap_chain_guard[swap_chain_id];
|
||||
#[cfg_attr(not(feature = "trace"), allow(unused_variables))]
|
||||
let device = &device_guard[sc.device_id.value];
|
||||
|
||||
let suf = B::get_surface_mut(surface);
|
||||
|
||||
@@ -10,6 +10,11 @@ use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::{io, ptr, slice};
|
||||
|
||||
/// Buffer-Texture copies on command encoders have to have the `bytes_per_row`
|
||||
/// aligned to this number.
|
||||
///
|
||||
/// This doesn't apply to `Queue::write_texture`.
|
||||
pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
|
||||
/// Bound uniform/storage buffer offsets must be aligned to this number.
|
||||
pub const BIND_BUFFER_ALIGNMENT: u64 = 256;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user