hal/dx12: bind group creation

2026-04-22 03:02:01 -04:00 · 2021-07-11 11:27:39 -04:00
parent 7d29a3b70e
commit 040a602b69
4 changed files with 317 additions and 37 deletions
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -557,9 +557,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {

        // Bind CBV/SRC/UAV descriptor tables
        if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
+            let descriptor = group.handle_views.unwrap().gpu;
            match self.pass.kind {
-                Pk::Render => list.set_graphics_root_descriptor_table(root_index, group.gpu_views),
-                Pk::Compute => list.set_compute_root_descriptor_table(root_index, group.gpu_views),
+                Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor),
+                Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor),
                Pk::Transfer => (),
            }
            root_index += 1;
@@ -567,13 +568,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {

        // Bind Sampler descriptor tables.
        if info.tables.contains(super::TableTypes::SAMPLERS) {
+            let descriptor = group.handle_samplers.unwrap().gpu;
            match self.pass.kind {
-                Pk::Render => {
-                    list.set_graphics_root_descriptor_table(root_index, group.gpu_samplers)
-                }
-                Pk::Compute => {
-                    list.set_compute_root_descriptor_table(root_index, group.gpu_samplers)
-                }
+                Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor),
+                Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor),
                Pk::Transfer => (),
            }
            root_index += 1;
--- a/wgpu-hal/src/dx12/descriptor.rs
+++ b/wgpu-hal/src/dx12/descriptor.rs
@@ -9,15 +9,26 @@ const HEAP_SIZE_FIXED: usize = 64;
 #[derive(Copy, Clone)]
 pub(super) struct DualHandle {
    cpu: native::CpuDescriptor,
-    gpu: native::GpuDescriptor,
+    pub gpu: native::GpuDescriptor,
    /// How large the block allocated to this handle is.
-    size: u64,
+    count: u64,
+}
+
+impl fmt::Debug for DualHandle {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("DualHandle")
+            .field("cpu", &self.cpu.ptr)
+            .field("gpu", &self.gpu.ptr)
+            .field("count", &self.count)
+            .finish()
+    }
 }

 type DescriptorIndex = u64;

 pub(super) struct GeneralHeap {
    pub raw: native::DescriptorHeap,
+    ty: native::DescriptorHeapType,
    handle_size: u64,
    total_handles: u64,
    start: DualHandle,
@@ -27,13 +38,13 @@ pub(super) struct GeneralHeap {
 impl GeneralHeap {
    pub(super) fn new(
        device: native::Device,
-        raw_type: native::DescriptorHeapType,
+        ty: native::DescriptorHeapType,
        total_handles: u64,
    ) -> Result<Self, crate::DeviceError> {
        let raw = device
            .create_descriptor_heap(
                total_handles as u32,
-                raw_type,
+                ty,
                native::DescriptorHeapFlags::SHADER_VISIBLE,
                0,
            )
@@ -41,37 +52,53 @@ impl GeneralHeap {

        Ok(Self {
            raw,
-            handle_size: device.get_descriptor_increment_size(raw_type) as u64,
+            ty,
+            handle_size: device.get_descriptor_increment_size(ty) as u64,
            total_handles,
            start: DualHandle {
                cpu: raw.start_cpu_descriptor(),
                gpu: raw.start_gpu_descriptor(),
-                size: 0,
+                count: 0,
            },
            ranges: Mutex::new(RangeAllocator::new(0..total_handles)),
        })
    }

-    pub(super) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle {
+    pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle {
        assert!(index < self.total_handles);
        DualHandle {
            cpu: self.cpu_descriptor_at(index),
            gpu: self.gpu_descriptor_at(index),
-            size,
+            count,
        }
    }

-    pub(super) fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor {
+    fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor {
        native::CpuDescriptor {
            ptr: self.start.cpu.ptr + (self.handle_size * index) as usize,
        }
    }

-    pub(super) fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor {
+    fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor {
        native::GpuDescriptor {
            ptr: self.start.gpu.ptr + self.handle_size * index,
        }
    }
+
+    pub(super) fn allocate_slice(&self, count: u64) -> Result<DescriptorIndex, crate::DeviceError> {
+        let range = self.ranges.lock().allocate_range(count).map_err(|err| {
+            log::error!("Unable to allocate descriptors: {:?}", err);
+            crate::DeviceError::OutOfMemory
+        })?;
+        Ok(range.start)
+    }
+
+    /// Free handles previously given out by this `DescriptorHeapSlice`.
+    /// Do not use this with handles not given out by this `DescriptorHeapSlice`.
+    pub(crate) fn free_slice(&self, handle: DualHandle) {
+        let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size;
+        self.ranges.lock().free_range(start..start + handle.count);
+    }
 }

 /// Fixed-size free-list allocator for CPU descriptors.
@@ -199,3 +226,81 @@ impl CpuPool {
        }
    }
 }
+
+pub(super) struct CpuHeapInner {
+    pub raw: native::DescriptorHeap,
+    pub stage: Vec<native::CpuDescriptor>,
+}
+
+pub(super) struct CpuHeap {
+    pub inner: Mutex<CpuHeapInner>,
+    start: native::CpuDescriptor,
+    handle_size: u32,
+    total: u32,
+}
+
+unsafe impl Send for CpuHeap {}
+unsafe impl Sync for CpuHeap {}
+
+impl CpuHeap {
+    pub(super) fn new(
+        device: native::Device,
+        ty: native::DescriptorHeapType,
+        total: u32,
+    ) -> Result<Self, crate::DeviceError> {
+        let handle_size = device.get_descriptor_increment_size(ty);
+        let raw = device
+            .create_descriptor_heap(total, ty, native::DescriptorHeapFlags::empty(), 0)
+            .into_device_result("CPU descriptor heap creation")?;
+
+        Ok(Self {
+            inner: Mutex::new(CpuHeapInner {
+                raw,
+                stage: Vec::new(),
+            }),
+            start: raw.start_cpu_descriptor(),
+            handle_size,
+            total,
+        })
+    }
+
+    pub(super) fn at(&self, index: u32) -> native::CpuDescriptor {
+        native::CpuDescriptor {
+            ptr: self.start.ptr + (self.handle_size * index) as usize,
+        }
+    }
+
+    pub(super) unsafe fn destroy(self) {
+        self.inner.into_inner().raw.destroy();
+    }
+}
+
+impl fmt::Debug for CpuHeap {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("CpuHeap")
+            .field("start", &self.start.ptr)
+            .field("handle_size", &self.handle_size)
+            .field("total", &self.total)
+            .finish()
+    }
+}
+
+pub(super) unsafe fn upload(
+    device: native::Device,
+    src: &CpuHeapInner,
+    dst: &GeneralHeap,
+    dummy_copy_counts: &[u32],
+) -> Result<DualHandle, crate::DeviceError> {
+    let count = src.stage.len() as u32;
+    let index = dst.allocate_slice(count as u64)?;
+    device.CopyDescriptors(
+        1,
+        &dst.cpu_descriptor_at(index),
+        &count,
+        count,
+        src.stage.as_ptr(),
+        dummy_copy_counts.as_ptr(),
+        dst.ty as u32,
+    );
+    Ok(dst.at(index, count as u64))
+}
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -825,13 +825,56 @@ impl crate::Device<super::Api> for super::Device {
        &self,
        desc: &crate::BindGroupLayoutDescriptor,
    ) -> Result<super::BindGroupLayout, crate::DeviceError> {
+        let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0);
+        for entry in desc.entries.iter() {
+            match entry.ty {
+                wgt::BindingType::Buffer {
+                    has_dynamic_offset: true,
+                    ..
+                } => {}
+                wgt::BindingType::Buffer { .. } => num_buffer_views += 1,
+                wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => {
+                    num_texture_views += 1
+                }
+                wgt::BindingType::Sampler { .. } => num_samplers += 1,
+            }
+        }
+
+        let num_views = num_buffer_views + num_texture_views;
        Ok(super::BindGroupLayout {
            entries: desc.entries.to_vec(),
+            cpu_heap_views: if num_views != 0 {
+                let heap = descriptor::CpuHeap::new(
+                    self.raw,
+                    native::DescriptorHeapType::CbvSrvUav,
+                    num_views,
+                )?;
+                Some(heap)
+            } else {
+                None
+            },
+            cpu_heap_samplers: if num_samplers != 0 {
+                let heap = descriptor::CpuHeap::new(
+                    self.raw,
+                    native::DescriptorHeapType::Sampler,
+                    num_samplers,
+                )?;
+                Some(heap)
+            } else {
+                None
+            },
+            copy_counts: vec![1; num_views.max(num_samplers) as usize],
        })
    }
-    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {
-        // just drop
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) {
+        if let Some(cpu_heap) = bg_layout.cpu_heap_views {
+            cpu_heap.destroy();
+        }
+        if let Some(cpu_heap) = bg_layout.cpu_heap_samplers {
+            cpu_heap.destroy();
+        }
    }
+
    unsafe fn create_pipeline_layout(
        &self,
        desc: &crate::PipelineLayoutDescriptor<super::Api>,
@@ -1054,13 +1097,151 @@ impl crate::Device<super::Api> for super::Device {
        &self,
        desc: &crate::BindGroupDescriptor<super::Api>,
    ) -> Result<super::BindGroup, crate::DeviceError> {
+        let mut cpu_views = desc
+            .layout
+            .cpu_heap_views
+            .as_ref()
+            .map(|cpu_heap| cpu_heap.inner.lock());
+        if let Some(ref mut inner) = cpu_views {
+            inner.stage.clear();
+        }
+        let mut cpu_samplers = desc
+            .layout
+            .cpu_heap_samplers
+            .as_ref()
+            .map(|cpu_heap| cpu_heap.inner.lock());
+        if let Some(ref mut inner) = cpu_samplers {
+            inner.stage.clear();
+        }
+        let mut dynamic_buffers = Vec::new();
+
+        for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) {
+            match layout.ty {
+                wgt::BindingType::Buffer {
+                    has_dynamic_offset,
+                    ty,
+                    ..
+                } => {
+                    let data = &desc.buffers[entry.resource_index as usize];
+                    let gpu_address = data.resolve_address();
+                    let size = data.resolve_size() as u32;
+                    let inner = cpu_views.as_mut().unwrap();
+                    let cpu_index = inner.stage.len() as u32;
+                    let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index);
+                    match ty {
+                        _ if has_dynamic_offset => {
+                            dynamic_buffers.push(gpu_address);
+                        }
+                        wgt::BufferBindingType::Uniform => {
+                            let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1;
+                            let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC {
+                                BufferLocation: gpu_address,
+                                SizeInBytes: size,
+                            };
+                            self.raw.CreateConstantBufferView(&raw_desc, handle);
+                        }
+                        wgt::BufferBindingType::Storage { read_only: true } => {
+                            let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC {
+                                Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
+                                Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
+                                ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER,
+                                u: mem::zeroed(),
+                            };
+                            *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV {
+                                FirstElement: data.offset,
+                                NumElements: size / 4,
+                                StructureByteStride: 0,
+                                Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW,
+                            };
+                            self.raw.CreateShaderResourceView(
+                                data.buffer.resource.as_mut_ptr(),
+                                &raw_desc,
+                                handle,
+                            );
+                        }
+                        wgt::BufferBindingType::Storage { read_only: false } => {
+                            let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC {
+                                Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
+                                ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER,
+                                u: mem::zeroed(),
+                            };
+                            *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV {
+                                FirstElement: data.offset,
+                                NumElements: size / 4,
+                                StructureByteStride: 0,
+                                CounterOffsetInBytes: 0,
+                                Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW,
+                            };
+                            self.raw.CreateUnorderedAccessView(
+                                data.buffer.resource.as_mut_ptr(),
+                                ptr::null_mut(),
+                                &raw_desc,
+                                handle,
+                            );
+                        }
+                    }
+                    inner.stage.push(handle);
+                }
+                wgt::BindingType::Texture { .. }
+                | wgt::BindingType::StorageTexture {
+                    access: wgt::StorageTextureAccess::ReadOnly,
+                    ..
+                } => {
+                    let data = &desc.textures[entry.resource_index as usize];
+                    let handle = data.view.handle_srv.unwrap();
+                    cpu_views.as_mut().unwrap().stage.push(handle.raw);
+                }
+                wgt::BindingType::StorageTexture { .. } => {
+                    let data = &desc.textures[entry.resource_index as usize];
+                    let handle = data.view.handle_uav.unwrap();
+                    cpu_views.as_mut().unwrap().stage.push(handle.raw);
+                }
+                wgt::BindingType::Sampler { .. } => {
+                    let data = &desc.samplers[entry.resource_index as usize];
+                    cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw);
+                }
+            }
+        }
+
+        let handle_views = match cpu_views {
+            Some(inner) => {
+                let dual = descriptor::upload(
+                    self.raw,
+                    &*inner,
+                    &self.shared.heap_views,
+                    &desc.layout.copy_counts,
+                )?;
+                Some(dual)
+            }
+            None => None,
+        };
+        let handle_samplers = match cpu_samplers {
+            Some(inner) => {
+                let dual = descriptor::upload(
+                    self.raw,
+                    &*inner,
+                    &self.shared.heap_samplers,
+                    &desc.layout.copy_counts,
+                )?;
+                Some(dual)
+            }
+            None => None,
+        };
+
        Ok(super::BindGroup {
-            gpu_views: unimplemented!(),
-            gpu_samplers: unimplemented!(),
-            dynamic_buffers: Vec::new(),
+            handle_views,
+            handle_samplers,
+            dynamic_buffers,
        })
    }
-    unsafe fn destroy_bind_group(&self, group: super::BindGroup) {}
+    unsafe fn destroy_bind_group(&self, group: super::BindGroup) {
+        if let Some(dual) = group.handle_views {
+            let _ = self.shared.heap_views.free_slice(dual);
+        }
+        if let Some(dual) = group.handle_samplers {
+            let _ = self.shared.heap_samplers.free_slice(dual);
+        }
+    }

    unsafe fn create_shader_module(
        &self,
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -16,7 +16,7 @@ mod instance;

 use arrayvec::ArrayVec;
 use parking_lot::Mutex;
-use std::{borrow::Cow, fmt, mem, ptr, sync::Arc};
+use std::{borrow::Cow, mem, ptr, sync::Arc};
 use winapi::{
    shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror},
    um::{d3d12, synchapi, winbase, winnt},
@@ -147,6 +147,8 @@ pub struct Adapter {
    device: native::Device,
    library: Arc<native::D3D12Lib>,
    private_caps: PrivateCapabilities,
+    //Note: this isn't used right now, but we'll need it later.
+    #[allow(unused)]
    workarounds: Workarounds,
 }

@@ -382,6 +384,9 @@ unsafe impl Sync for Fence {}
 pub struct BindGroupLayout {
    /// Sorted list of entries.
    entries: Vec<wgt::BindGroupLayoutEntry>,
+    cpu_heap_views: Option<descriptor::CpuHeap>,
+    cpu_heap_samplers: Option<descriptor::CpuHeap>,
+    copy_counts: Vec<u32>, // all 1's
 }

 enum BufferViewKind {
@@ -390,22 +395,13 @@ enum BufferViewKind {
    UnorderedAccess,
 }

+#[derive(Debug)]
 pub struct BindGroup {
-    gpu_views: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
-    gpu_samplers: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
+    handle_views: Option<descriptor::DualHandle>,
+    handle_samplers: Option<descriptor::DualHandle>,
    dynamic_buffers: Vec<native::GpuAddress>,
 }

-impl fmt::Debug for BindGroup {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("BindGroup")
-            .field("gpu_views", &self.gpu_views.ptr)
-            .field("gpu_samplers", &self.gpu_samplers.ptr)
-            .field("dynamic_buffers", &self.dynamic_buffers)
-            .finish()
-    }
-}
-
 bitflags::bitflags! {
    struct TableTypes: u8 {
        const SRV_CBV_UAV = 0x1;