diff --git a/player/src/main.rs b/player/src/main.rs index 94aecbfc79..4fcdf9b96e 100644 --- a/player/src/main.rs +++ b/player/src/main.rs @@ -411,12 +411,21 @@ impl GlobalExt for wgc::hub::Global { let bin = std::fs::read(dir.join(data)).unwrap(); let size = (range.end - range.start) as usize; if queued { - self.queue_write_buffer::(device, &bin, id, range.start); + self.queue_write_buffer::(device, id, range.start, &bin); } else { self.device_wait_for_buffer::(device, id); self.device_set_buffer_sub_data::(device, id, range.start, &bin[..size]); } } + A::WriteTexture { + to, + data, + layout, + size, + } => { + let bin = std::fs::read(dir.join(data)).unwrap(); + self.queue_write_texture::(device, &to, &bin, &layout, size); + } A::Submit(_index, commands) => { let encoder = self.device_create_command_encoder::( device, diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 3d19246132..979b8ffa46 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -17,7 +17,19 @@ use wgt::{BufferAddress, BufferUsage, Extent3d, Origin3d, TextureUsage}; use std::iter; -const BITS_PER_BYTE: u32 = 8; +pub(crate) const BITS_PER_BYTE: u32 = 8; + +#[repr(C)] +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct TextureDataLayout { + pub offset: BufferAddress, + pub bytes_per_row: u32, + pub rows_per_image: u32, +} + +//TODO: use `TextureDataLayout` internally #[repr(C)] #[derive(Clone, Debug)] @@ -44,7 +56,10 @@ pub struct TextureCopyView { impl TextureCopyView { //TODO: we currently access each texture twice for a transfer, // once only to get the aspect flags, which is unfortunate. - fn to_selector(&self, aspects: hal::format::Aspects) -> hal::image::SubresourceRange { + pub(crate) fn to_selector( + &self, + aspects: hal::format::Aspects, + ) -> hal::image::SubresourceRange { let level = self.mip_level as hal::image::Level; let layer = self.array_layer as hal::image::Layer; @@ -60,7 +75,10 @@ impl TextureCopyView { } } - fn to_sub_layers(&self, aspects: hal::format::Aspects) -> hal::image::SubresourceLayers { + pub(crate) fn to_sub_layers( + &self, + aspects: hal::format::Aspects, + ) -> hal::image::SubresourceLayers { let layer = self.array_layer as hal::image::Layer; // TODO: Can't satisfy clippy here unless we modify // `hal::image::SubresourceLayers` in gfx to use @@ -138,7 +156,7 @@ impl Global { let cmb_raw = cmb.raw.last_mut().unwrap(); unsafe { cmb_raw.pipeline_barrier( - all_buffer_stages()..all_buffer_stages(), + all_buffer_stages()..hal::pso::PipelineStage::TRANSFER, hal::memory::Dependencies::empty(), barriers, ); @@ -210,10 +228,9 @@ impl Global { image_extent: conv::map_extent(copy_size), }; let cmb_raw = cmb.raw.last_mut().unwrap(); - let stages = all_buffer_stages() | all_image_stages(); unsafe { cmb_raw.pipeline_barrier( - stages..stages, + all_buffer_stages() | all_image_stages()..hal::pso::PipelineStage::TRANSFER, hal::memory::Dependencies::empty(), src_barriers.chain(dst_barriers), ); @@ -298,10 +315,9 @@ impl Global { image_extent: conv::map_extent(copy_size), }; let cmb_raw = cmb.raw.last_mut().unwrap(); - let stages = all_buffer_stages() | all_image_stages(); unsafe { cmb_raw.pipeline_barrier( - stages..stages, + all_buffer_stages() | all_image_stages()..hal::pso::PipelineStage::TRANSFER, hal::memory::Dependencies::empty(), src_barriers.chain(dst_barrier), ); @@ -380,7 +396,7 @@ impl Global { let cmb_raw = cmb.raw.last_mut().unwrap(); unsafe { cmb_raw.pipeline_barrier( - all_image_stages()..all_image_stages(), + all_image_stages()..hal::pso::PipelineStage::TRANSFER, hal::memory::Dependencies::empty(), barriers, ); diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index f8cdfb8eb5..aebcc473cc 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -5,10 +5,11 @@ #[cfg(feature = "trace")] use crate::device::trace::Action; use crate::{ - command::{CommandAllocator, CommandBuffer}, + command::{CommandAllocator, CommandBuffer, TextureCopyView, TextureDataLayout, BITS_PER_BYTE}, + conv, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token}, id, - resource::{BufferMapState, BufferUse}, + resource::{BufferMapState, BufferUse, TextureUse}, }; use gfx_memory::{Block, Heaps, MemoryBlock}; @@ -48,13 +49,15 @@ impl PendingWrites { } } +//TODO: move out common parts of write_xxx. + impl Global { pub fn queue_write_buffer( &self, queue_id: id::QueueId, - data: &[u8], buffer_id: id::BufferId, buffer_offset: wgt::BufferAddress, + data: &[u8], ) { let hub = B::hub(self); let mut token = Token::root(); @@ -164,6 +167,147 @@ impl Global { device.pending_writes.command_buffer = Some(comb); } + pub fn queue_write_texture( + &self, + queue_id: id::QueueId, + destination: &TextureCopyView, + data: &[u8], + data_layout: &TextureDataLayout, + size: wgt::Extent3d, + ) { + let hub = B::hub(self); + let mut token = Token::root(); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = &mut device_guard[queue_id]; + let (texture_guard, _) = hub.textures.read(&mut token); + let aspects = texture_guard[destination.texture].full_range.aspects; + + #[cfg(feature = "trace")] + match device.trace { + Some(ref trace) => { + let mut trace = trace.lock(); + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteTexture { + to: destination.clone(), + data: data_path, + layout: data_layout.clone(), + size, + }); + } + None => {} + } + + let mut trackers = device.trackers.lock(); + let (dst, transition) = trackers.textures.use_replace( + &*texture_guard, + destination.texture, + destination.to_selector(aspects), + TextureUse::COPY_DST, + ); + assert!( + dst.usage.contains(wgt::TextureUsage::COPY_DST), + "Write texture usage {:?} must contain usage flag DST_SRC", + dst.usage + ); + + let last_submit_index = device.life_guard.submission_index.load(Ordering::Relaxed); + dst.life_guard.use_at(last_submit_index + 1); + + let mut src_raw = unsafe { + device + .raw + .create_buffer( + data.len() as wgt::BufferAddress, + hal::buffer::Usage::TRANSFER_SRC, + ) + .unwrap() + }; + //TODO: do we need to transition into HOST_WRITE access first? + let requirements = unsafe { device.raw.get_buffer_requirements(&src_raw) }; + + let mut memory = device + .mem_allocator + .lock() + .allocate( + &device.raw, + requirements.type_mask as u32, + gfx_memory::MemoryUsage::Staging { read_back: false }, + gfx_memory::Kind::Linear, + requirements.size, + requirements.alignment, + ) + .unwrap(); + unsafe { + device + .raw + .set_buffer_name(&mut src_raw, ""); + device + .raw + .bind_buffer_memory(memory.memory(), memory.segment().offset, &mut src_raw) + .unwrap(); + } + + let mut mapped = memory.map(&device.raw, hal::memory::Segment::ALL).unwrap(); + unsafe { mapped.write(&device.raw, hal::memory::Segment::ALL) } + .unwrap() + .slice[..data.len()] + .copy_from_slice(data); + + let mut comb = match device.pending_writes.command_buffer.take() { + Some(comb) => comb, + None => { + let mut comb = device.com_allocator.allocate_internal(); + unsafe { + comb.begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT); + } + comb + } + }; + + let bytes_per_texel = conv::map_texture_format(dst.format, device.private_features) + .surface_desc() + .bits as u32 + / BITS_PER_BYTE; + let buffer_width = data_layout.bytes_per_row / bytes_per_texel; + assert_eq!( + data_layout.bytes_per_row % bytes_per_texel, + 0, + "Source bytes per row ({}) must be a multiple of bytes per texel ({})", + data_layout.bytes_per_row, + bytes_per_texel + ); + let region = hal::command::BufferImageCopy { + buffer_offset: 0, + buffer_width, + buffer_height: data_layout.rows_per_image, + image_layers: destination.to_sub_layers(aspects), + image_offset: conv::map_origin(destination.origin), + image_extent: conv::map_extent(size), + }; + unsafe { + comb.pipeline_barrier( + super::all_image_stages() | hal::pso::PipelineStage::HOST + ..hal::pso::PipelineStage::TRANSFER, + hal::memory::Dependencies::empty(), + iter::once(hal::memory::Barrier::Buffer { + states: hal::buffer::Access::HOST_WRITE..hal::buffer::Access::TRANSFER_READ, + target: &src_raw, + range: hal::buffer::SubRange::WHOLE, + families: None, + }) + .chain(transition.map(|pending| pending.into_hal(dst))), + ); + comb.copy_buffer_to_image( + &src_raw, + &dst.raw, + hal::image::Layout::TransferDstOptimal, + iter::once(region), + ); + } + device.pending_writes.temp_buffers.push((src_raw, memory)); + device.pending_writes.command_buffer = Some(comb); + } + pub fn queue_submit( &self, queue_id: id::QueueId, diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index b3afbe6703..d7ebc2625a 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -3,7 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use crate::{ - command::{BufferCopyView, TextureCopyView}, + command::{BufferCopyView, TextureCopyView, TextureDataLayout}, id, }; #[cfg(feature = "trace")] @@ -168,6 +168,12 @@ pub enum Action { range: Range, queued: bool, }, + WriteTexture { + to: TextureCopyView, + data: FileName, + layout: TextureDataLayout, + size: wgt::Extent3d, + }, Submit(crate::SubmissionIndex, Vec), }