From 8163dc7b471a2f4c1db74ca12514a58bacde1a44 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 13 Jul 2021 00:29:29 -0400 Subject: [PATCH] hal/dx12: pipeline creation --- README.md | 2 +- wgpu-hal/src/dx12/adapter.rs | 1 + wgpu-hal/src/dx12/command.rs | 34 ++++- wgpu-hal/src/dx12/conv.rs | 207 +++++++++++++++++++++++++++++- wgpu-hal/src/dx12/device.rs | 240 ++++++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/mod.rs | 6 +- wgpu-types/src/lib.rs | 2 +- 7 files changed, 465 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 8edc2907c7..0bb725b5c4 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ If you are looking for the native implementation or bindings to the API in other API | Windows 7/10 | Linux & Android | macOS & iOS | ----- | ------------------ | ------------------ | ------------------ | DX11 | :construction: | | | - DX12 | :construction: | | | + DX12 | :ok: | | | Vulkan | :white_check_mark: | :white_check_mark: | | Metal | | | :white_check_mark: | GLes3 | | :ok: | | diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 175d1524e6..15df11842d 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -108,6 +108,7 @@ impl super::Adapter { super::MemoryArchitecture::NonUnified }, shader_debug_info: instance_flags.contains(crate::InstanceFlags::DEBUG), + heap_create_not_zeroed: false, //TODO: winapi support for Options7 }; // Theoretically vram limited, but in practice 2^20 is the limit diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index e93989981e..1bcf7e38bf 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -637,7 +637,30 @@ impl crate::CommandEncoder for super::CommandEncoder { self.list.unwrap().EndEvent() } - unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) {} + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.unwrap(); + + list.set_graphics_root_signature(pipeline.signature); + list.set_pipeline_state(pipeline.raw); + list.IASetPrimitiveTopology(pipeline.topology); + + //TODO: root signature changes require full layout rebind! + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } unsafe fn set_index_buffer<'a>( &mut self, @@ -795,7 +818,14 @@ impl crate::CommandEncoder for super::CommandEncoder { self.end_pass(); } - unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) {} + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.unwrap(); + + list.set_compute_root_signature(pipeline.signature); + list.set_pipeline_state(pipeline.raw); + + //TODO: root signature changes require full layout rebind! + } unsafe fn dispatch(&mut self, count: [u32; 3]) { self.list.unwrap().dispatch(count); diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index b7dee5aa7a..7ea1c5885b 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -1,7 +1,7 @@ use std::iter; use winapi::{ shared::{dxgi1_2, dxgiformat}, - um::d3d12, + um::{d3d12, d3dcommon}, }; pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { @@ -117,6 +117,45 @@ pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { } } +pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::VertexFormat as Vf; + use winapi::shared::dxgiformat::*; + + match format { + Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM, + Vf::Snorm8x2 => DXGI_FORMAT_R8G8_SNORM, + Vf::Uint8x2 => DXGI_FORMAT_R8G8_UINT, + Vf::Sint8x2 => DXGI_FORMAT_R8G8_SINT, + Vf::Unorm8x4 => DXGI_FORMAT_R8G8B8A8_UNORM, + Vf::Snorm8x4 => DXGI_FORMAT_R8G8B8A8_SNORM, + Vf::Uint8x4 => DXGI_FORMAT_R8G8B8A8_UINT, + Vf::Sint8x4 => DXGI_FORMAT_R8G8B8A8_SINT, + Vf::Unorm16x2 => DXGI_FORMAT_R16G16_UNORM, + Vf::Snorm16x2 => DXGI_FORMAT_R16G16_SNORM, + Vf::Uint16x2 => DXGI_FORMAT_R16G16_UINT, + Vf::Sint16x2 => DXGI_FORMAT_R16G16_SINT, + Vf::Float16x2 => DXGI_FORMAT_R16G16_FLOAT, + Vf::Unorm16x4 => DXGI_FORMAT_R16G16B16A16_UNORM, + Vf::Snorm16x4 => DXGI_FORMAT_R16G16B16A16_SNORM, + Vf::Uint16x4 => DXGI_FORMAT_R16G16B16A16_UINT, + Vf::Sint16x4 => DXGI_FORMAT_R16G16B16A16_SINT, + Vf::Float16x4 => DXGI_FORMAT_R16G16B16A16_FLOAT, + Vf::Uint32 => DXGI_FORMAT_R32_UINT, + Vf::Sint32 => DXGI_FORMAT_R32_SINT, + Vf::Float32 => DXGI_FORMAT_R32_FLOAT, + Vf::Uint32x2 => DXGI_FORMAT_R32G32_UINT, + Vf::Sint32x2 => DXGI_FORMAT_R32G32_SINT, + Vf::Float32x2 => DXGI_FORMAT_R32G32_FLOAT, + Vf::Uint32x3 => DXGI_FORMAT_R32G32B32_UINT, + Vf::Sint32x3 => DXGI_FORMAT_R32G32B32_SINT, + Vf::Float32x3 => DXGI_FORMAT_R32G32B32_FLOAT, + Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, + Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, + Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + pub fn map_acomposite_alpha_mode(mode: crate::CompositeAlphaMode) -> dxgi1_2::DXGI_ALPHA_MODE { use crate::CompositeAlphaMode as Cam; match mode { @@ -308,3 +347,169 @@ pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12::D3D12_RES } state } + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Point => { + log::error!("Point rasterization is not supported"); + d3d12::D3D12_FILL_MODE_WIREFRAME + } + wgt::PolygonMode::Line => d3d12::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Fill => d3d12::D3D12_FILL_MODE_SOLID, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12::D3D12_BLEND_ZERO, + Bf::One => d3d12::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12::D3D12_BLEND_SRC_ALPHA_SAT, + //Bf::Src1Color if is_alpha => d3d12::D3D12_BLEND_SRC1_ALPHA, + //Bf::Src1Color => d3d12::D3D12_BLEND_SRC1_COLOR, + //Bf::OneMinusSrc1Color if is_alpha => d3d12::D3D12_BLEND_INV_SRC1_ALPHA, + //Bf::OneMinusSrc1Color => d3d12::D3D12_BLEND_INV_SRC1_COLOR, + //Bf::Src1Alpha => d3d12::D3D12_BLEND_SRC1_ALPHA, + //Bf::OneMinusSrc1Alpha => d3d12::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12::D3D12_BLEND_OP, + d3d12::D3D12_BLEND, + d3d12::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[wgt::ColorTargetState], +) -> [d3d12::D3D12_RENDER_TARGET_BLEND_DESC; d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] +{ + let dummy_target = d3d12::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12::D3D12_BLEND_ZERO, + DestBlend: d3d12::D3D12_BLEND_ZERO, + BlendOp: d3d12::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12::D3D12_BLEND_OP_ADD, + LogicOp: d3d12::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12::D3D12_DEPTH_STENCILOP_DESC { + d3d12::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12::D3D12_DEPTH_STENCIL_DESC { + d3d12::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: if ds.is_depth_enabled() { 1 } else { 0 }, + DepthWriteMask: if ds.depth_write_enabled { + d3d12::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: if ds.stencil.is_enabled() { 1 } else { 0 }, + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index cd636fdf8a..e1517fb441 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{ffi, mem, ptr, slice, sync::Arc}; +use std::{ffi, mem, num::NonZeroU32, ptr, slice, sync::Arc}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, d3dcompiler, synchapi, winbase}, @@ -8,6 +8,8 @@ use winapi::{ }; const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; +//TODO: find the exact value +const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12::D3D12_HEAP_FLAG_NONE; impl super::Device { pub(super) fn new( @@ -48,13 +50,16 @@ impl super::Device { let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, - CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE, - MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_L0, + CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12::D3D12_MEMORY_POOL_L1, + }, CreationNodeMask: 0, VisibleNodeMask: 0, }; - let hr = raw.CreateCommittedResource( + raw.CreateCommittedResource( &heap_properties, d3d12::D3D12_HEAP_FLAG_NONE, &raw_desc, @@ -62,17 +67,11 @@ impl super::Device { ptr::null(), &d3d12::ID3D12Resource::uuidof(), zero_buffer.mut_void(), - ); + ) + .into_device_result("Zero buffer creation")?; - hr.into_device_result("Zero buffer creation")?; - - let range = d3d12::D3D12_RANGE { Begin: 0, End: 0 }; - let mut ptr = std::ptr::null_mut(); - (*zero_buffer) - .Map(0, &range, &mut ptr) - .into_device_result("Map zero buffer")?; - slice::from_raw_parts_mut(ptr as *mut u8, super::ZERO_BUFFER_SIZE as usize).fill(0); - (*zero_buffer).Unmap(0, &range); + //Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. }; // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 @@ -129,6 +128,10 @@ impl super::Device { }, private_caps, shared: Arc::new(shared), + //Note: these names have to match Naga's convention + vertex_attribute_names: (0..d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) + .map(|i| ffi::CString::new(format!("LOC{}", i)).unwrap()) + .collect(), rtv_pool: Mutex::new(descriptor::CpuPool::new( raw, native::DescriptorHeapType::Rtv, @@ -651,7 +654,11 @@ impl crate::Device for super::Device { let hr = self.raw.CreateCommittedResource( &heap_properties, - d3d12::D3D12_HEAP_FLAG_NONE, + if self.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12::D3D12_HEAP_FLAG_NONE + }, &raw_desc, d3d12::D3D12_RESOURCE_STATE_COMMON, ptr::null(), @@ -726,7 +733,11 @@ impl crate::Device for super::Device { let hr = self.raw.CreateCommittedResource( &heap_properties, - d3d12::D3D12_HEAP_FLAG_NONE, + if self.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12::D3D12_HEAP_FLAG_NONE + }, &raw_desc, d3d12::D3D12_RESOURCE_STATE_COMMON, ptr::null(), @@ -975,6 +986,16 @@ impl crate::Device for super::Device { // Root Descriptors 1 // ... + //TODO: reverse the order, according to this advice in + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + //> Furthermore, applications should generally sort the layout + //> of the root arguments in decreasing order of change frequency. + //> This way if some implementations need to switch to a different + //> memory storage scheme to version parts of a heavily populated + //> root arguments, the data that is changing at the highest frequency + //> (near the start of the root arguments) is most likely to run + //> as efficiently as possible. + let mut root_offset = 0u32; let root_constants: &[()] = &[]; @@ -1338,7 +1359,181 @@ impl crate::Device for super::Device { &self, desc: &crate::RenderPipelineDescriptor, ) -> Result { - unimplemented!() + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)? + } + None => native::Blob::null(), + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::InputStepMode::Vertex => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::InputStepMode::Instance => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + let name = &self.vertex_attribute_names[attribute.shader_location as usize]; + input_element_descs.push(d3d12::D3D12_INPUT_ELEMENT_DESC { + SemanticName: name.as_ptr(), + SemanticIndex: attribute.shader_location, + Format: conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + *rtv_format = conv::map_texture_format(ct.format); + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias.clone()) + .unwrap_or_default(); + + let raw_rasterizer = d3d12::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: if desc.primitive.clamp_depth { 0 } else { 1 }, + MultisampleEnable: if desc.multisample.count > 1 { 1 } else { 0 }, + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.raw.as_mut_ptr(), + VS: *native::Shader::from_blob(blob_vs), + PS: if blob_fs.is_null() { + *native::Shader::null() + } else { + *native::Shader::from_blob(blob_fs) + }, + GS: *native::Shader::null(), + DS: *native::Shader::null(), + HS: *native::Shader::null(), + StreamOutput: d3d12::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12::D3D12_BLEND_DESC { + AlphaToCoverageEnable: if desc.multisample.alpha_to_coverage_enabled { + 1 + } else { + 0 + }, + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => mem::zeroed(), + }, + InputLayout: d3d12::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, + Some(wgt::IndexFormat::Uint32) => { + d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = native::PipelineState::null(); + let hr = self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ); + + blob_vs.destroy(); + if !blob_fs.is_null() { + blob_fs.destroy(); + } + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::RenderPipeline { + raw, + signature: desc.layout.raw, + topology, + vertex_strides, + }) } unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { pipeline.raw.destroy(); @@ -1348,17 +1543,17 @@ impl crate::Device for super::Device { &self, desc: &crate::ComputePipelineDescriptor, ) -> Result { - let cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; let pair = self.raw.create_compute_pipeline_state( desc.layout.raw, - native::Shader::from_blob(cs), + native::Shader::from_blob(blob_cs), 0, native::CachedPSO::null(), native::PipelineStateFlags::empty(), ); - cs.destroy(); + blob_cs.destroy(); let raw = pair.into_result().map_err(|err| { crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) @@ -1369,7 +1564,10 @@ impl crate::Device for super::Device { raw.SetName(cwstr.as_ptr()); } - Ok(super::ComputePipeline { raw }) + Ok(super::ComputePipeline { + raw, + signature: desc.layout.raw, + }) } unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { pipeline.raw.destroy(); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 23af6212f1..4c2e43ab94 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -16,7 +16,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, mem, num::NonZeroU32, ptr, sync::Arc}; +use std::{borrow::Cow, ffi, mem, num::NonZeroU32, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -132,6 +132,7 @@ struct PrivateCapabilities { heterogeneous_resource_heaps: bool, memory_architecture: MemoryArchitecture, shader_debug_info: bool, + heap_create_not_zeroed: bool, } #[derive(Default)] @@ -197,6 +198,7 @@ pub struct Device { idler: Idler, private_caps: PrivateCapabilities, shared: Arc, + vertex_attribute_names: Vec, // CPU only pools rtv_pool: Mutex, dsv_pool: Mutex, @@ -433,6 +435,7 @@ pub struct ShaderModule { pub struct RenderPipeline { raw: native::PipelineState, + signature: native::RootSignature, topology: d3d12::D3D12_PRIMITIVE_TOPOLOGY, vertex_strides: [Option; crate::MAX_VERTEX_BUFFERS], } @@ -442,6 +445,7 @@ unsafe impl Sync for RenderPipeline {} pub struct ComputePipeline { raw: native::PipelineState, + signature: native::RootSignature, } unsafe impl Send for ComputePipeline {} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index bda8bc7e9b..bb92953203 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -110,7 +110,7 @@ bitflags::bitflags! { /// Vulkan + Metal + DX12 + Browser WebGPU const PRIMARY = Self::VULKAN.bits | Self::METAL.bits - | Self::DX12.bits + //| Self::DX12.bits // enable when Naga is polished | Self::BROWSER_WEBGPU.bits; /// All the apis that wgpu offers second tier of support for. These may /// be unsupported/still experimental.