Enforce copy buffer-texture row pitch alignment

This commit is contained in:
Dzmitry Malyshau
2020-05-28 10:21:09 -04:00
parent 3a6cdeec94
commit 321a5cee0f
7 changed files with 114 additions and 46 deletions

View File

@@ -307,7 +307,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let hub = B::hub(self);
let mut token = Token::root();
let (adapter_guard, mut token) = hub.adapters.read(&mut token);
let (device_guard, mut token) = hub.devices.read(&mut token);
let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token);
@@ -371,13 +370,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
};
let (context, sample_count) = {
use hal::{adapter::PhysicalDevice as _, device::Device as _};
use hal::device::Device as _;
let limits = adapter_guard[device.adapter_id.value]
.raw
.physical_device
.limits();
let samples_count_limit = limits.framebuffer_color_sample_counts;
let samples_count_limit = device.hal_limits.framebuffer_color_sample_counts;
let base_trackers = &cmb.trackers;
let mut extent = None;

View File

@@ -199,14 +199,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.surface_desc()
.bits as u32
/ BITS_PER_BYTE;
let buffer_width = source.layout.bytes_per_row / bytes_per_texel;
assert_eq!(wgt::COPY_BYTES_PER_ROW_ALIGNMENT % bytes_per_texel, 0);
assert_eq!(
source.layout.bytes_per_row % bytes_per_texel,
source.layout.bytes_per_row % wgt::COPY_BYTES_PER_ROW_ALIGNMENT,
0,
"Source bytes per row ({}) must be a multiple of bytes per texel ({})",
"Source bytes per row ({}) must be a multiple of {}",
source.layout.bytes_per_row,
bytes_per_texel
wgt::COPY_BYTES_PER_ROW_ALIGNMENT
);
let buffer_width = source.layout.bytes_per_row / bytes_per_texel;
let region = hal::command::BufferImageCopy {
buffer_offset: source.layout.offset,
buffer_width,
@@ -286,14 +287,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.surface_desc()
.bits as u32
/ BITS_PER_BYTE;
let buffer_width = destination.layout.bytes_per_row / bytes_per_texel;
assert_eq!(wgt::COPY_BYTES_PER_ROW_ALIGNMENT % bytes_per_texel, 0);
assert_eq!(
destination.layout.bytes_per_row % bytes_per_texel,
destination.layout.bytes_per_row % wgt::COPY_BYTES_PER_ROW_ALIGNMENT,
0,
"Destination bytes per row ({}) must be a multiple of bytes per texel ({})",
"Destination bytes per row ({}) must be a multiple of {}",
destination.layout.bytes_per_row,
bytes_per_texel
wgt::COPY_BYTES_PER_ROW_ALIGNMENT
);
let buffer_width = destination.layout.bytes_per_row / bytes_per_texel;
let region = hal::command::BufferImageCopy {
buffer_offset: destination.layout.offset,
buffer_width,

View File

@@ -202,6 +202,7 @@ pub struct Device<B: hal::Backend> {
// Life tracker should be locked right after the device and before anything else.
life_tracker: Mutex<life::LifetimeTracker<B>>,
temp_suspected: life::SuspectedResources,
pub(crate) hal_limits: hal::Limits,
pub(crate) private_features: PrivateFeatures,
limits: wgt::Limits,
extensions: wgt::Extensions,
@@ -216,7 +217,7 @@ impl<B: GfxBackend> Device<B> {
adapter_id: Stored<id::AdapterId>,
queue_group: hal::queue::QueueGroup<B>,
mem_props: hal::adapter::MemoryProperties,
non_coherent_atom_size: u64,
hal_limits: hal::Limits,
supports_texture_d24_s8: bool,
desc: &wgt::DeviceDescriptor,
trace_path: Option<&std::path::Path>,
@@ -237,7 +238,7 @@ impl<B: GfxBackend> Device<B> {
gfx_memory::LinearConfig {
linear_size: 0x100_0000,
},
non_coherent_atom_size,
hal_limits.non_coherent_atom_size as u64,
)
};
#[cfg(not(feature = "trace"))]
@@ -273,6 +274,7 @@ impl<B: GfxBackend> Device<B> {
None
}
}),
hal_limits,
private_features: PrivateFeatures {
supports_texture_d24_s8,
},

View File

@@ -61,19 +61,16 @@ impl<B: hal::Backend> PendingWrites<B> {
}
impl<B: hal::Backend> super::Device<B> {
fn stage_data(&mut self, data: &[u8]) -> StagingData<B> {
fn prepare_stage(&mut self, size: wgt::BufferAddress) -> StagingData<B> {
let mut buffer = unsafe {
self.raw
.create_buffer(
data.len() as wgt::BufferAddress,
hal::buffer::Usage::TRANSFER_SRC,
)
.create_buffer(size, hal::buffer::Usage::TRANSFER_SRC)
.unwrap()
};
//TODO: do we need to transition into HOST_WRITE access first?
let requirements = unsafe { self.raw.get_buffer_requirements(&buffer) };
let mut memory = self
let memory = self
.mem_allocator
.lock()
.allocate(
@@ -92,12 +89,6 @@ impl<B: hal::Backend> super::Device<B> {
.unwrap();
}
let mut mapped = memory.map(&self.raw, hal::memory::Segment::ALL).unwrap();
unsafe { mapped.write(&self.raw, hal::memory::Segment::ALL) }
.unwrap()
.slice[..data.len()]
.copy_from_slice(data);
let comb = match self.pending_writes.command_buffer.take() {
Some(comb) => comb,
None => {
@@ -147,7 +138,17 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
None => {}
}
let mut stage = device.stage_data(data);
let mut stage = device.prepare_stage(data.len() as wgt::BufferAddress);
{
let mut mapped = stage
.memory
.map(&device.raw, hal::memory::Segment::ALL)
.unwrap();
unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) }
.unwrap()
.slice[..data.len()]
.copy_from_slice(data);
}
let mut trackers = device.trackers.lock();
let (dst, transition) =
@@ -217,7 +218,45 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
None => {}
}
let mut stage = device.stage_data(data);
let texture_format = texture_guard[destination.texture].format;
let bytes_per_texel = conv::map_texture_format(texture_format, device.private_features)
.surface_desc()
.bits as u32
/ BITS_PER_BYTE;
let stage_bytes_per_row = get_lowest_common_denom(
device.hal_limits.optimal_buffer_copy_pitch_alignment as u32,
bytes_per_texel,
);
let stage_size = stage_bytes_per_row as u64
* ((size.depth - 1) * data_layout.rows_per_image + size.height) as u64;
let mut stage = device.prepare_stage(stage_size);
{
let mut mapped = stage
.memory
.map(&device.raw, hal::memory::Segment::ALL)
.unwrap();
let mapping = unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) }.unwrap();
if stage_bytes_per_row == data_layout.bytes_per_row {
// Unlikely case of the data already being aligned optimally.
mapping.slice[..stage_size as usize].copy_from_slice(data);
} else {
// Copy row by row into the optimal alignment.
let copy_bytes_per_row =
stage_bytes_per_row.min(data_layout.bytes_per_row) as usize;
for layer in 0..size.depth {
let rows_offset = layer * data_layout.rows_per_image;
for row in 0..size.height {
let data_offset =
(rows_offset + row) as usize * data_layout.bytes_per_row as usize;
let stage_offset =
(rows_offset + row) as usize * stage_bytes_per_row as usize;
mapping.slice[stage_offset..stage_offset + copy_bytes_per_row]
.copy_from_slice(&data[data_offset..data_offset + copy_bytes_per_row]);
}
}
}
}
let mut trackers = device.trackers.lock();
let (dst, transition) = trackers.textures.use_replace(
@@ -235,21 +274,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let last_submit_index = device.life_guard.submission_index.load(Ordering::Relaxed);
dst.life_guard.use_at(last_submit_index + 1);
let bytes_per_texel = conv::map_texture_format(dst.format, device.private_features)
.surface_desc()
.bits as u32
/ BITS_PER_BYTE;
let buffer_width = data_layout.bytes_per_row / bytes_per_texel;
assert_eq!(
data_layout.bytes_per_row % bytes_per_texel,
0,
"Source bytes per row ({}) must be a multiple of bytes per texel ({})",
data_layout.bytes_per_row,
bytes_per_texel
);
let region = hal::command::BufferImageCopy {
buffer_offset: 0,
buffer_width,
buffer_width: stage_bytes_per_row / bytes_per_texel,
buffer_height: data_layout.rows_per_image,
image_layers,
image_offset,
@@ -467,3 +494,40 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
super::fire_map_callbacks(callbacks);
}
}
fn get_lowest_common_denom(a: u32, b: u32) -> u32 {
let gcd = if a >= b {
get_greatest_common_divisor(a, b)
} else {
get_greatest_common_divisor(b, a)
};
a * b / gcd
}
fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 {
assert!(a >= b);
loop {
let c = a % b;
if c == 0 {
return b;
} else {
a = b;
b = c;
}
}
}
#[test]
fn test_lcd() {
assert_eq!(get_lowest_common_denom(2, 2), 2);
assert_eq!(get_lowest_common_denom(2, 3), 6);
assert_eq!(get_lowest_common_denom(6, 4), 12);
}
#[test]
fn test_gcd() {
assert_eq!(get_greatest_common_divisor(5, 1), 1);
assert_eq!(get_greatest_common_divisor(4, 2), 2);
assert_eq!(get_greatest_common_divisor(6, 4), 2);
assert_eq!(get_greatest_common_divisor(7, 7), 7);
}

View File

@@ -18,7 +18,6 @@ use serde::Deserialize;
use serde::Serialize;
use hal::{
self,
adapter::{AdapterInfo as HalAdapterInfo, DeviceType as HalDeviceType, PhysicalDevice as _},
queue::QueueFamily as _,
window::Surface as _,
@@ -652,7 +651,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
},
gpu.queue_groups.swap_remove(0),
mem_props,
limits.non_coherent_atom_size as u64,
limits,
supports_texture_d24_s8,
desc,
trace_path,

View File

@@ -101,6 +101,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let (device_guard, mut token) = hub.devices.read(&mut token);
let (mut swap_chain_guard, mut token) = hub.swap_chains.write(&mut token);
let sc = &mut swap_chain_guard[swap_chain_id];
#[cfg_attr(not(feature = "trace"), allow(unused_variables))]
let device = &device_guard[sc.device_id.value];
let suf = B::get_surface_mut(surface);

View File

@@ -10,6 +10,11 @@ use serde::Deserialize;
use serde::Serialize;
use std::{io, ptr, slice};
/// Buffer-Texture copies on command encoders have to have the `bytes_per_row`
/// aligned to this number.
///
/// This doesn't apply to `Queue::write_texture`.
pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
/// Bound uniform/storage buffer offsets must be aligned to this number.
pub const BIND_BUFFER_ALIGNMENT: u64 = 256;