From 040a602b697dca127893ce3df1de3233046303d6 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 11 Jul 2021 11:27:39 -0400 Subject: [PATCH] hal/dx12: bind group creation --- wgpu-hal/src/dx12/command.rs | 14 +-- wgpu-hal/src/dx12/descriptor.rs | 125 +++++++++++++++++++-- wgpu-hal/src/dx12/device.rs | 193 +++++++++++++++++++++++++++++++- wgpu-hal/src/dx12/mod.rs | 22 ++-- 4 files changed, 317 insertions(+), 37 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index a8f31ca0e6..bec9a743b6 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -557,9 +557,10 @@ impl crate::CommandEncoder for super::CommandEncoder { // Bind CBV/SRC/UAV descriptor tables if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + let descriptor = group.handle_views.unwrap().gpu; match self.pass.kind { - Pk::Render => list.set_graphics_root_descriptor_table(root_index, group.gpu_views), - Pk::Compute => list.set_compute_root_descriptor_table(root_index, group.gpu_views), + Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor), Pk::Transfer => (), } root_index += 1; @@ -567,13 +568,10 @@ impl crate::CommandEncoder for super::CommandEncoder { // Bind Sampler descriptor tables. if info.tables.contains(super::TableTypes::SAMPLERS) { + let descriptor = group.handle_samplers.unwrap().gpu; match self.pass.kind { - Pk::Render => { - list.set_graphics_root_descriptor_table(root_index, group.gpu_samplers) - } - Pk::Compute => { - list.set_compute_root_descriptor_table(root_index, group.gpu_samplers) - } + Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor), Pk::Transfer => (), } root_index += 1; diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs index a1db54aa54..da8ce6786b 100644 --- a/wgpu-hal/src/dx12/descriptor.rs +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -9,15 +9,26 @@ const HEAP_SIZE_FIXED: usize = 64; #[derive(Copy, Clone)] pub(super) struct DualHandle { cpu: native::CpuDescriptor, - gpu: native::GpuDescriptor, + pub gpu: native::GpuDescriptor, /// How large the block allocated to this handle is. - size: u64, + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } } type DescriptorIndex = u64; pub(super) struct GeneralHeap { pub raw: native::DescriptorHeap, + ty: native::DescriptorHeapType, handle_size: u64, total_handles: u64, start: DualHandle, @@ -27,13 +38,13 @@ pub(super) struct GeneralHeap { impl GeneralHeap { pub(super) fn new( device: native::Device, - raw_type: native::DescriptorHeapType, + ty: native::DescriptorHeapType, total_handles: u64, ) -> Result { let raw = device .create_descriptor_heap( total_handles as u32, - raw_type, + ty, native::DescriptorHeapFlags::SHADER_VISIBLE, 0, ) @@ -41,37 +52,53 @@ impl GeneralHeap { Ok(Self { raw, - handle_size: device.get_descriptor_increment_size(raw_type) as u64, + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, total_handles, start: DualHandle { cpu: raw.start_cpu_descriptor(), gpu: raw.start_gpu_descriptor(), - size: 0, + count: 0, }, ranges: Mutex::new(RangeAllocator::new(0..total_handles)), }) } - pub(super) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { assert!(index < self.total_handles); DualHandle { cpu: self.cpu_descriptor_at(index), gpu: self.gpu_descriptor_at(index), - size, + count, } } - pub(super) fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { + fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { native::CpuDescriptor { ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, } } - pub(super) fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { + fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { native::GpuDescriptor { ptr: self.start.gpu.ptr + self.handle_size * index, } } + + pub(super) fn allocate_slice(&self, count: u64) -> Result { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } } /// Fixed-size free-list allocator for CPU descriptors. @@ -199,3 +226,81 @@ impl CpuPool { } } } + +pub(super) struct CpuHeapInner { + pub raw: native::DescriptorHeap, + pub stage: Vec, +} + +pub(super) struct CpuHeap { + pub inner: Mutex, + start: native::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: native::Device, + ty: native::DescriptorHeapType, + total: u32, + ) -> Result { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, native::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + raw, + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) unsafe fn destroy(self) { + self.inner.into_inner().raw.destroy(); + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: native::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ); + Ok(dst.at(index, count as u64)) +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 666fc687b0..ef43e0f84e 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -825,13 +825,56 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupLayoutDescriptor, ) -> Result { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += 1, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += 1 + } + wgt::BindingType::Sampler { .. } => num_samplers += 1, + } + } + + let num_views = num_buffer_views + num_texture_views; Ok(super::BindGroupLayout { entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + native::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + native::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], }) } - unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) { - // just drop + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + if let Some(cpu_heap) = bg_layout.cpu_heap_views { + cpu_heap.destroy(); + } + if let Some(cpu_heap) = bg_layout.cpu_heap_samplers { + cpu_heap.destroy(); + } } + unsafe fn create_pipeline_layout( &self, desc: &crate::PipelineLayoutDescriptor, @@ -1054,13 +1097,151 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupDescriptor, ) -> Result { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset, + ty, + .. + } => { + let data = &desc.buffers[entry.resource_index as usize]; + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + _ if has_dynamic_offset => { + dynamic_buffers.push(gpu_address); + } + wgt::BufferBindingType::Uniform => { + let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: size, + }; + self.raw.CreateConstantBufferView(&raw_desc, handle); + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV { + FirstElement: data.offset, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW, + }; + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ); + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: data.offset, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW, + }; + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ); + } + } + inner.stage.push(handle); + } + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::ReadOnly, + .. + } => { + let data = &desc.textures[entry.resource_index as usize]; + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + wgt::BindingType::StorageTexture { .. } => { + let data = &desc.textures[entry.resource_index as usize]; + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + wgt::BindingType::Sampler { .. } => { + let data = &desc.samplers[entry.resource_index as usize]; + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = descriptor::upload( + self.raw, + &*inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + )?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = descriptor::upload( + self.raw, + &*inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + )?; + Some(dual) + } + None => None, + }; + Ok(super::BindGroup { - gpu_views: unimplemented!(), - gpu_samplers: unimplemented!(), - dynamic_buffers: Vec::new(), + handle_views, + handle_samplers, + dynamic_buffers, }) } - unsafe fn destroy_bind_group(&self, group: super::BindGroup) {} + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + let _ = self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + let _ = self.shared.heap_samplers.free_slice(dual); + } + } unsafe fn create_shader_module( &self, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index d53793aed8..e08a6f1563 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -16,7 +16,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, fmt, mem, ptr, sync::Arc}; +use std::{borrow::Cow, mem, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -147,6 +147,8 @@ pub struct Adapter { device: native::Device, library: Arc, private_caps: PrivateCapabilities, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] workarounds: Workarounds, } @@ -382,6 +384,9 @@ unsafe impl Sync for Fence {} pub struct BindGroupLayout { /// Sorted list of entries. entries: Vec, + cpu_heap_views: Option, + cpu_heap_samplers: Option, + copy_counts: Vec, // all 1's } enum BufferViewKind { @@ -390,22 +395,13 @@ enum BufferViewKind { UnorderedAccess, } +#[derive(Debug)] pub struct BindGroup { - gpu_views: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE, - gpu_samplers: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE, + handle_views: Option, + handle_samplers: Option, dynamic_buffers: Vec, } -impl fmt::Debug for BindGroup { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("BindGroup") - .field("gpu_views", &self.gpu_views.ptr) - .field("gpu_samplers", &self.gpu_samplers.ptr) - .field("dynamic_buffers", &self.dynamic_buffers) - .finish() - } -} - bitflags::bitflags! { struct TableTypes: u8 { const SRV_CBV_UAV = 0x1;