diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index d3d0e67704..d9a613fe1d 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -239,10 +239,11 @@ impl Global { .bits as u32 / BITS_PER_BYTE; - let stage_bytes_per_row = get_lowest_common_denom( + let bytes_per_row_alignment = get_lowest_common_denom( device.hal_limits.optimal_buffer_copy_pitch_alignment as u32, bytes_per_texel, ); + let stage_bytes_per_row = align_to(bytes_per_texel * size.width, bytes_per_row_alignment); let stage_size = stage_bytes_per_row as u64 * ((size.depth - 1) * data_layout.rows_per_image + size.height) as u64; let mut stage = device.prepare_stage(stage_size); @@ -532,6 +533,13 @@ fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 { } } +fn align_to(value: u32, alignment: u32) -> u32 { + match value % alignment { + 0 => value, + other => value - other + alignment, + } +} + #[test] fn test_lcd() { assert_eq!(get_lowest_common_denom(2, 2), 2);