hal/dx12: bind group creation

This commit is contained in:
Dzmitry Malyshau
2021-07-11 11:27:39 -04:00
parent 7d29a3b70e
commit 040a602b69
4 changed files with 317 additions and 37 deletions

View File

@@ -557,9 +557,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
// Bind CBV/SRC/UAV descriptor tables
if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
let descriptor = group.handle_views.unwrap().gpu;
match self.pass.kind {
Pk::Render => list.set_graphics_root_descriptor_table(root_index, group.gpu_views),
Pk::Compute => list.set_compute_root_descriptor_table(root_index, group.gpu_views),
Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor),
Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor),
Pk::Transfer => (),
}
root_index += 1;
@@ -567,13 +568,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
// Bind Sampler descriptor tables.
if info.tables.contains(super::TableTypes::SAMPLERS) {
let descriptor = group.handle_samplers.unwrap().gpu;
match self.pass.kind {
Pk::Render => {
list.set_graphics_root_descriptor_table(root_index, group.gpu_samplers)
}
Pk::Compute => {
list.set_compute_root_descriptor_table(root_index, group.gpu_samplers)
}
Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor),
Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor),
Pk::Transfer => (),
}
root_index += 1;

View File

@@ -9,15 +9,26 @@ const HEAP_SIZE_FIXED: usize = 64;
#[derive(Copy, Clone)]
pub(super) struct DualHandle {
cpu: native::CpuDescriptor,
gpu: native::GpuDescriptor,
pub gpu: native::GpuDescriptor,
/// How large the block allocated to this handle is.
size: u64,
count: u64,
}
impl fmt::Debug for DualHandle {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DualHandle")
.field("cpu", &self.cpu.ptr)
.field("gpu", &self.gpu.ptr)
.field("count", &self.count)
.finish()
}
}
type DescriptorIndex = u64;
pub(super) struct GeneralHeap {
pub raw: native::DescriptorHeap,
ty: native::DescriptorHeapType,
handle_size: u64,
total_handles: u64,
start: DualHandle,
@@ -27,13 +38,13 @@ pub(super) struct GeneralHeap {
impl GeneralHeap {
pub(super) fn new(
device: native::Device,
raw_type: native::DescriptorHeapType,
ty: native::DescriptorHeapType,
total_handles: u64,
) -> Result<Self, crate::DeviceError> {
let raw = device
.create_descriptor_heap(
total_handles as u32,
raw_type,
ty,
native::DescriptorHeapFlags::SHADER_VISIBLE,
0,
)
@@ -41,37 +52,53 @@ impl GeneralHeap {
Ok(Self {
raw,
handle_size: device.get_descriptor_increment_size(raw_type) as u64,
ty,
handle_size: device.get_descriptor_increment_size(ty) as u64,
total_handles,
start: DualHandle {
cpu: raw.start_cpu_descriptor(),
gpu: raw.start_gpu_descriptor(),
size: 0,
count: 0,
},
ranges: Mutex::new(RangeAllocator::new(0..total_handles)),
})
}
pub(super) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle {
pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle {
assert!(index < self.total_handles);
DualHandle {
cpu: self.cpu_descriptor_at(index),
gpu: self.gpu_descriptor_at(index),
size,
count,
}
}
pub(super) fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor {
fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor {
native::CpuDescriptor {
ptr: self.start.cpu.ptr + (self.handle_size * index) as usize,
}
}
pub(super) fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor {
fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor {
native::GpuDescriptor {
ptr: self.start.gpu.ptr + self.handle_size * index,
}
}
pub(super) fn allocate_slice(&self, count: u64) -> Result<DescriptorIndex, crate::DeviceError> {
let range = self.ranges.lock().allocate_range(count).map_err(|err| {
log::error!("Unable to allocate descriptors: {:?}", err);
crate::DeviceError::OutOfMemory
})?;
Ok(range.start)
}
/// Free handles previously given out by this `DescriptorHeapSlice`.
/// Do not use this with handles not given out by this `DescriptorHeapSlice`.
pub(crate) fn free_slice(&self, handle: DualHandle) {
let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size;
self.ranges.lock().free_range(start..start + handle.count);
}
}
/// Fixed-size free-list allocator for CPU descriptors.
@@ -199,3 +226,81 @@ impl CpuPool {
}
}
}
pub(super) struct CpuHeapInner {
pub raw: native::DescriptorHeap,
pub stage: Vec<native::CpuDescriptor>,
}
pub(super) struct CpuHeap {
pub inner: Mutex<CpuHeapInner>,
start: native::CpuDescriptor,
handle_size: u32,
total: u32,
}
unsafe impl Send for CpuHeap {}
unsafe impl Sync for CpuHeap {}
impl CpuHeap {
pub(super) fn new(
device: native::Device,
ty: native::DescriptorHeapType,
total: u32,
) -> Result<Self, crate::DeviceError> {
let handle_size = device.get_descriptor_increment_size(ty);
let raw = device
.create_descriptor_heap(total, ty, native::DescriptorHeapFlags::empty(), 0)
.into_device_result("CPU descriptor heap creation")?;
Ok(Self {
inner: Mutex::new(CpuHeapInner {
raw,
stage: Vec::new(),
}),
start: raw.start_cpu_descriptor(),
handle_size,
total,
})
}
pub(super) fn at(&self, index: u32) -> native::CpuDescriptor {
native::CpuDescriptor {
ptr: self.start.ptr + (self.handle_size * index) as usize,
}
}
pub(super) unsafe fn destroy(self) {
self.inner.into_inner().raw.destroy();
}
}
impl fmt::Debug for CpuHeap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CpuHeap")
.field("start", &self.start.ptr)
.field("handle_size", &self.handle_size)
.field("total", &self.total)
.finish()
}
}
pub(super) unsafe fn upload(
device: native::Device,
src: &CpuHeapInner,
dst: &GeneralHeap,
dummy_copy_counts: &[u32],
) -> Result<DualHandle, crate::DeviceError> {
let count = src.stage.len() as u32;
let index = dst.allocate_slice(count as u64)?;
device.CopyDescriptors(
1,
&dst.cpu_descriptor_at(index),
&count,
count,
src.stage.as_ptr(),
dummy_copy_counts.as_ptr(),
dst.ty as u32,
);
Ok(dst.at(index, count as u64))
}

View File

@@ -825,13 +825,56 @@ impl crate::Device<super::Api> for super::Device {
&self,
desc: &crate::BindGroupLayoutDescriptor,
) -> Result<super::BindGroupLayout, crate::DeviceError> {
let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0);
for entry in desc.entries.iter() {
match entry.ty {
wgt::BindingType::Buffer {
has_dynamic_offset: true,
..
} => {}
wgt::BindingType::Buffer { .. } => num_buffer_views += 1,
wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => {
num_texture_views += 1
}
wgt::BindingType::Sampler { .. } => num_samplers += 1,
}
}
let num_views = num_buffer_views + num_texture_views;
Ok(super::BindGroupLayout {
entries: desc.entries.to_vec(),
cpu_heap_views: if num_views != 0 {
let heap = descriptor::CpuHeap::new(
self.raw,
native::DescriptorHeapType::CbvSrvUav,
num_views,
)?;
Some(heap)
} else {
None
},
cpu_heap_samplers: if num_samplers != 0 {
let heap = descriptor::CpuHeap::new(
self.raw,
native::DescriptorHeapType::Sampler,
num_samplers,
)?;
Some(heap)
} else {
None
},
copy_counts: vec![1; num_views.max(num_samplers) as usize],
})
}
unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {
// just drop
unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) {
if let Some(cpu_heap) = bg_layout.cpu_heap_views {
cpu_heap.destroy();
}
if let Some(cpu_heap) = bg_layout.cpu_heap_samplers {
cpu_heap.destroy();
}
}
unsafe fn create_pipeline_layout(
&self,
desc: &crate::PipelineLayoutDescriptor<super::Api>,
@@ -1054,13 +1097,151 @@ impl crate::Device<super::Api> for super::Device {
&self,
desc: &crate::BindGroupDescriptor<super::Api>,
) -> Result<super::BindGroup, crate::DeviceError> {
let mut cpu_views = desc
.layout
.cpu_heap_views
.as_ref()
.map(|cpu_heap| cpu_heap.inner.lock());
if let Some(ref mut inner) = cpu_views {
inner.stage.clear();
}
let mut cpu_samplers = desc
.layout
.cpu_heap_samplers
.as_ref()
.map(|cpu_heap| cpu_heap.inner.lock());
if let Some(ref mut inner) = cpu_samplers {
inner.stage.clear();
}
let mut dynamic_buffers = Vec::new();
for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) {
match layout.ty {
wgt::BindingType::Buffer {
has_dynamic_offset,
ty,
..
} => {
let data = &desc.buffers[entry.resource_index as usize];
let gpu_address = data.resolve_address();
let size = data.resolve_size() as u32;
let inner = cpu_views.as_mut().unwrap();
let cpu_index = inner.stage.len() as u32;
let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index);
match ty {
_ if has_dynamic_offset => {
dynamic_buffers.push(gpu_address);
}
wgt::BufferBindingType::Uniform => {
let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1;
let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC {
BufferLocation: gpu_address,
SizeInBytes: size,
};
self.raw.CreateConstantBufferView(&raw_desc, handle);
}
wgt::BufferBindingType::Storage { read_only: true } => {
let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC {
Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER,
u: mem::zeroed(),
};
*raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV {
FirstElement: data.offset,
NumElements: size / 4,
StructureByteStride: 0,
Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW,
};
self.raw.CreateShaderResourceView(
data.buffer.resource.as_mut_ptr(),
&raw_desc,
handle,
);
}
wgt::BufferBindingType::Storage { read_only: false } => {
let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC {
Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER,
u: mem::zeroed(),
};
*raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV {
FirstElement: data.offset,
NumElements: size / 4,
StructureByteStride: 0,
CounterOffsetInBytes: 0,
Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW,
};
self.raw.CreateUnorderedAccessView(
data.buffer.resource.as_mut_ptr(),
ptr::null_mut(),
&raw_desc,
handle,
);
}
}
inner.stage.push(handle);
}
wgt::BindingType::Texture { .. }
| wgt::BindingType::StorageTexture {
access: wgt::StorageTextureAccess::ReadOnly,
..
} => {
let data = &desc.textures[entry.resource_index as usize];
let handle = data.view.handle_srv.unwrap();
cpu_views.as_mut().unwrap().stage.push(handle.raw);
}
wgt::BindingType::StorageTexture { .. } => {
let data = &desc.textures[entry.resource_index as usize];
let handle = data.view.handle_uav.unwrap();
cpu_views.as_mut().unwrap().stage.push(handle.raw);
}
wgt::BindingType::Sampler { .. } => {
let data = &desc.samplers[entry.resource_index as usize];
cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw);
}
}
}
let handle_views = match cpu_views {
Some(inner) => {
let dual = descriptor::upload(
self.raw,
&*inner,
&self.shared.heap_views,
&desc.layout.copy_counts,
)?;
Some(dual)
}
None => None,
};
let handle_samplers = match cpu_samplers {
Some(inner) => {
let dual = descriptor::upload(
self.raw,
&*inner,
&self.shared.heap_samplers,
&desc.layout.copy_counts,
)?;
Some(dual)
}
None => None,
};
Ok(super::BindGroup {
gpu_views: unimplemented!(),
gpu_samplers: unimplemented!(),
dynamic_buffers: Vec::new(),
handle_views,
handle_samplers,
dynamic_buffers,
})
}
unsafe fn destroy_bind_group(&self, group: super::BindGroup) {}
unsafe fn destroy_bind_group(&self, group: super::BindGroup) {
if let Some(dual) = group.handle_views {
let _ = self.shared.heap_views.free_slice(dual);
}
if let Some(dual) = group.handle_samplers {
let _ = self.shared.heap_samplers.free_slice(dual);
}
}
unsafe fn create_shader_module(
&self,

View File

@@ -16,7 +16,7 @@ mod instance;
use arrayvec::ArrayVec;
use parking_lot::Mutex;
use std::{borrow::Cow, fmt, mem, ptr, sync::Arc};
use std::{borrow::Cow, mem, ptr, sync::Arc};
use winapi::{
shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror},
um::{d3d12, synchapi, winbase, winnt},
@@ -147,6 +147,8 @@ pub struct Adapter {
device: native::Device,
library: Arc<native::D3D12Lib>,
private_caps: PrivateCapabilities,
//Note: this isn't used right now, but we'll need it later.
#[allow(unused)]
workarounds: Workarounds,
}
@@ -382,6 +384,9 @@ unsafe impl Sync for Fence {}
pub struct BindGroupLayout {
/// Sorted list of entries.
entries: Vec<wgt::BindGroupLayoutEntry>,
cpu_heap_views: Option<descriptor::CpuHeap>,
cpu_heap_samplers: Option<descriptor::CpuHeap>,
copy_counts: Vec<u32>, // all 1's
}
enum BufferViewKind {
@@ -390,22 +395,13 @@ enum BufferViewKind {
UnorderedAccess,
}
#[derive(Debug)]
pub struct BindGroup {
gpu_views: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
gpu_samplers: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
handle_views: Option<descriptor::DualHandle>,
handle_samplers: Option<descriptor::DualHandle>,
dynamic_buffers: Vec<native::GpuAddress>,
}
impl fmt::Debug for BindGroup {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("BindGroup")
.field("gpu_views", &self.gpu_views.ptr)
.field("gpu_samplers", &self.gpu_samplers.ptr)
.field("dynamic_buffers", &self.dynamic_buffers)
.finish()
}
}
bitflags::bitflags! {
struct TableTypes: u8 {
const SRV_CBV_UAV = 0x1;