diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a315458180..1b9bd3ac34 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -2418,6 +2418,7 @@ impl Device { /// Wait for idle and remove resources that we can, before we die. pub(crate) fn prepare_to_die(&mut self) { + self.pending_writes.deactivate(); let mut life_tracker = self.life_tracker.lock(); let current_index = self.active_submission_index; if let Err(error) = unsafe { self.raw.wait(&self.fence, current_index, CLEANUP_WAIT_MS) } { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index e302bd8c6a..577a6fd7e7 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -166,6 +166,15 @@ impl PendingWrites { } &mut self.command_encoder } + + pub fn deactivate(&mut self) { + if self.is_active { + unsafe { + self.command_encoder.discard_encoding(); + } + self.is_active = false; + } + } } #[derive(Default)] diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index cb367e0332..5d632211da 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -636,6 +636,14 @@ impl Hub { } } + // destroy command buffers first, since otherwise DX12 isn't happy + for element in self.command_buffers.data.write().map.drain(..) { + if let Element::Occupied(command_buffer, _) = element { + let device = &devices[command_buffer.device_id.value]; + device.destroy_command_buffer(command_buffer); + } + } + for element in self.samplers.data.write().map.drain(..) { if let Element::Occupied(sampler, _) = element { unsafe { @@ -673,12 +681,6 @@ impl Hub { devices[buffer.device_id.value].destroy_buffer(buffer); } } - for element in self.command_buffers.data.write().map.drain(..) { - if let Element::Occupied(command_buffer, _) = element { - let device = &devices[command_buffer.device_id.value]; - device.destroy_command_buffer(command_buffer); - } - } for element in self.bind_groups.data.write().map.drain(..) { if let Element::Occupied(bind_group, _) = element { let device = &devices[bind_group.device_id.value]; diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 16d3f90604..040e6083f5 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -264,10 +264,7 @@ impl crate::Adapter for super::Adapter { ) -> crate::TextureFormatCapabilities { use crate::TextureFormatCapabilities as Tfc; - let info = format.describe(); - let is_compressed = info.block_dimensions != (1, 1); let raw_format = conv::map_texture_format(format); - let mut data = d3d12::D3D12_FEATURE_DATA_FORMAT_SUPPORT { Format: raw_format, Support1: mem::zeroed(), diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 1bcf7e38bf..9f3ea39e98 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -36,7 +36,7 @@ impl super::CommandEncoder { unsafe fn end_pass(&mut self) { let list = self.list.unwrap(); - list.set_descriptor_heaps(&[native::DescriptorHeap::null(); 2]); + list.set_descriptor_heaps(&[]); if self.pass.has_label { list.EndEvent(); } @@ -344,7 +344,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_buffer( &mut self, src: &super::Texture, - src_usage: crate::TextureUses, + _src_usage: crate::TextureUses, dst: &super::Buffer, regions: T, ) where @@ -364,7 +364,7 @@ impl crate::CommandEncoder for super::CommandEncoder { let raw_format = conv::map_texture_format(src.format); for r in regions { - let dst_box = make_box(&r.texture_base.origin, &r.size); + let src_box = make_box(&r.texture_base.origin, &r.size); *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.texture_base); *dst_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { Offset: r.buffer_layout.offset, @@ -380,7 +380,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }, }; - list.CopyTextureRegion(&src_location, 0, 0, 0, &dst_location, &dst_box); + list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); } } @@ -410,7 +410,7 @@ impl crate::CommandEncoder for super::CommandEncoder { range: Range, buffer: &super::Buffer, offset: wgt::BufferAddress, - stride: wgt::BufferSize, + _stride: wgt::BufferSize, ) { self.list.unwrap().ResolveQueryData( set.raw.as_mut_ptr(), @@ -508,7 +508,7 @@ impl crate::CommandEncoder for super::CommandEncoder { pResource: resolve.src.0.as_mut_ptr(), Subresource: resolve.src.1, StateBefore: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, - StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, }; self.temp.barriers.push(barrier); *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 7ea1c5885b..cfe11a638d 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -100,9 +100,9 @@ pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI } } +//Note: DXGI doesn't allow sRGB format on the swapchain, +// but creating RTV of swapchain buffers with sRGB works. pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { - // NOTE: DXGI doesn't allow sRGB format on the swapchain, but - // creating RTV of swapchain buffers with sRGB works match format { wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, @@ -110,6 +110,24 @@ pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI } } +//Note: SRV and UAV can't use the depth formats directly +//TODO: stencil views? +pub fn map_texture_format_nodepth(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Depth32Float => dxgiformat::DXGI_FORMAT_R32_FLOAT, + wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8 => { + dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS + } + _ => { + assert_eq!( + crate::FormatAspects::from(format), + crate::FormatAspects::COLOR + ); + map_texture_format(format) + } + } +} + pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { match format { wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, @@ -170,9 +188,6 @@ pub fn map_buffer_usage_to_resource_flags(usage: crate::BufferUses) -> d3d12::D3 if usage.contains(crate::BufferUses::STORAGE_STORE) { flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } - if !usage.intersects(crate::BufferUses::UNIFORM | crate::BufferUses::STORAGE_LOAD) { - flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - } flags } @@ -196,13 +211,13 @@ pub fn map_texture_usage_to_resource_flags( crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, ) { flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) { + flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } } if usage.contains(crate::TextureUses::STORAGE_STORE) { flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } - if !usage.intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) { - flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - } flags } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 8082a056f3..0f5f5a3570 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -7,6 +7,8 @@ use winapi::{ Interface, }; +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; //TODO: find the exact value const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12::D3D12_HEAP_FLAG_NONE; @@ -45,7 +47,7 @@ impl super::Device { Quality: 0, }, Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - Flags: d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE, + Flags: d3d12::D3D12_RESOURCE_FLAG_NONE, }; let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { @@ -110,7 +112,7 @@ impl super::Device { heap_views: descriptor::GeneralHeap::new( raw, native::DescriptorHeapType::CbvSrvUav, - capacity_samplers, + capacity_views, )?, heap_samplers: descriptor::GeneralHeap::new( raw, @@ -128,10 +130,6 @@ impl super::Device { }, private_caps, shared: Arc::new(shared), - //Note: these names have to match Naga's convention - vertex_attribute_names: (0..d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) - .map(|i| ffi::CString::new(format!("LOC{}", i)).unwrap()) - .collect(), rtv_pool: Mutex::new(descriptor::CpuPool::new( raw, native::DescriptorHeapType::Rtv, @@ -153,7 +151,12 @@ impl super::Device { } pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { - let value = self.idler.fence.get_value() + 1; + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(crate::DeviceError::Lost); + } + + let value = cur_value + 1; log::info!("Waiting for idle with value {}", value); self.present_queue.signal(self.idler.fence, value); let hr = self @@ -171,7 +174,7 @@ impl super::Device { desc: &crate::TextureViewDescriptor, ) -> descriptor::Handle { let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { - Format: conv::map_texture_format(desc.format), + Format: conv::map_texture_format_nodepth(desc.format), ViewDimension: 0, Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, u: mem::zeroed(), @@ -280,7 +283,7 @@ impl super::Device { desc: &crate::TextureViewDescriptor, ) -> descriptor::Handle { let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { - Format: conv::map_texture_format(desc.format), + Format: conv::map_texture_format_nodepth(desc.format), ViewDimension: 0, u: mem::zeroed(), }; @@ -524,7 +527,7 @@ impl super::Device { //TODO: reuse the writer let mut source = String::new(); let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); - let reflection_info = writer + let _reflection_info = writer .write(module, &stage.module.naga.info) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {:?}", e)))?; @@ -593,6 +596,7 @@ impl crate::Device for super::Device { self.srv_uav_pool.into_inner().destroy(); self.sampler_pool.into_inner().destroy(); self.shared.destroy(); + self.idler.destroy(); // Debug tracking alive objects if !thread::panicking() { @@ -616,11 +620,16 @@ impl crate::Device for super::Device { desc: &crate::BufferDescriptor, ) -> Result { let mut resource = native::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } let raw_desc = d3d12::D3D12_RESOURCE_DESC { Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, Alignment: 0, - Width: desc.size, + Width: size, Height: 1, DepthOrArraySize: 1, MipLevels: 1, @@ -670,10 +679,12 @@ impl crate::Device for super::Device { ); hr.into_device_result("Buffer creation")?; - Ok(super::Buffer { - resource, - size: desc.size, - }) + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + resource.SetName(cwstr.as_ptr()); + } + + Ok(super::Buffer { resource, size }) } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { buffer.resource.destroy(); @@ -697,8 +708,8 @@ impl crate::Device for super::Device { (*buffer.resource).Unmap(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }); Ok(()) } - unsafe fn flush_mapped_ranges(&self, _buffer: &super::Buffer, ranges: I) {} - unsafe fn invalidate_mapped_ranges(&self, _buffer: &super::Buffer, ranges: I) {} + unsafe fn flush_mapped_ranges(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, _buffer: &super::Buffer, _ranges: I) {} unsafe fn create_texture( &self, @@ -743,17 +754,17 @@ impl crate::Device for super::Device { }, &raw_desc, d3d12::D3D12_RESOURCE_STATE_COMMON, - ptr::null(), + ptr::null(), // clear value &d3d12::ID3D12Resource::uuidof(), resource.mut_void(), ); + hr.into_device_result("Texture creation")?; if let Some(label) = desc.label { let cwstr = conv::map_label(label); resource.SetName(cwstr.as_ptr()); } - hr.into_device_result("Texture creation")?; Ok(super::Texture { resource, format: desc.format, @@ -886,6 +897,12 @@ impl crate::Device for super::Device { .raw .create_command_allocator(native::CmdListType::Direct) .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + allocator.SetName(cwstr.as_ptr()); + } + Ok(super::CommandEncoder { allocator, device: self.raw, @@ -999,7 +1016,6 @@ impl crate::Device for super::Device { //> (near the start of the root arguments) is most likely to run //> as efficiently as possible. - let mut root_offset = 0u32; let root_constants: &[()] = &[]; // Number of elements in the root signature. @@ -1077,7 +1093,6 @@ impl crate::Device for super::Device { &ranges[range_base..], )); info.tables |= super::TableTypes::SRV_CBV_UAV; - root_offset += 1; } // Sampler descriptor tables @@ -1103,7 +1118,6 @@ impl crate::Device for super::Device { &ranges[range_base..], )); info.tables |= super::TableTypes::SAMPLERS; - root_offset += 1; } // Root (dynamic) descriptor tables @@ -1137,7 +1151,6 @@ impl crate::Device for super::Device { }; info.dynamic_buffers.push(kind); parameters.push(param); - root_offset += 2; // root view costs 2 words } bind_group_infos.push(info); @@ -1217,10 +1230,13 @@ impl crate::Device for super::Device { for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { match layout.ty { wgt::BindingType::Buffer { - has_dynamic_offset, - ty, + has_dynamic_offset: true, .. } => { + let data = &desc.buffers[entry.resource_index as usize]; + dynamic_buffers.push(data.resolve_address()); + } + wgt::BindingType::Buffer { ty, .. } => { let data = &desc.buffers[entry.resource_index as usize]; let gpu_address = data.resolve_address(); let size = data.resolve_size() as u32; @@ -1228,14 +1244,12 @@ impl crate::Device for super::Device { let cpu_index = inner.stage.len() as u32; let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); match ty { - _ if has_dynamic_offset => { - dynamic_buffers.push(gpu_address); - } wgt::BufferBindingType::Uniform => { - let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let size_mask = + d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC { BufferLocation: gpu_address, - SizeInBytes: size, + SizeInBytes: ((size - 1) | size_mask) + 1, }; self.raw.CreateConstantBufferView(&raw_desc, handle); } @@ -1344,7 +1358,7 @@ impl crate::Device for super::Device { unsafe fn create_shader_module( &self, - desc: &crate::ShaderModuleDescriptor, + _desc: &crate::ShaderModuleDescriptor, shader: crate::ShaderInput, ) -> Result { match shader { @@ -1392,9 +1406,8 @@ impl crate::Device for super::Device { } }; for attribute in vbuf.attributes { - let name = &self.vertex_attribute_names[attribute.shader_location as usize]; input_element_descs.push(d3d12::D3D12_INPUT_ELEMENT_DESC { - SemanticName: name.as_ptr(), + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, SemanticIndex: attribute.shader_location, Format: conv::map_vertex_format(attribute.format), InputSlot: i as u32, diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index 7647ce448c..1e40a45a86 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -41,7 +41,7 @@ unsafe extern "system" fn output_debug_string_handler( Some(msg) => { match MESSAGE_PREFIXES .iter() - .find(|&&(prefix, level)| msg.starts_with(prefix)) + .find(|&&(prefix, _)| msg.starts_with(prefix)) { Some(&(prefix, level)) => (&msg[prefix.len() + 2..], level), None => (msg, log::Level::Debug), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 672acba969..8dea5cc9e4 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -5,8 +5,6 @@ !*/ -#![allow(unused_variables)] - mod adapter; mod command; mod conv; @@ -16,7 +14,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, ffi, mem, num::NonZeroU32, ptr, sync::Arc}; +use std::{borrow::Cow, mem, num::NonZeroU32, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -109,8 +107,10 @@ struct SwapChain { // when the swapchain is destroyed resources: Vec, waitable: winnt::HANDLE, - present_mode: wgt::PresentMode, acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, } pub struct Surface { @@ -162,6 +162,12 @@ struct Idler { event: native::Event, } +impl Idler { + unsafe fn destroy(self) { + self.fence.destroy(); + } +} + struct CommandSignatures { draw: native::CommandSignature, draw_indexed: native::CommandSignature, @@ -199,7 +205,6 @@ pub struct Device { idler: Idler, private_caps: PrivateCapabilities, shared: Arc, - vertex_attribute_names: Vec, // CPU only pools rtv_pool: Mutex, dsv_pool: Mutex, @@ -220,6 +225,14 @@ pub struct Queue { unsafe impl Send for Queue {} unsafe impl Sync for Queue {} +impl Drop for Queue { + fn drop(&mut self) { + unsafe { + self.raw.destroy(); + } + } +} + #[derive(Default)] struct Temp { marker: Vec, @@ -579,6 +592,8 @@ impl crate::Surface for Surface { waitable, acquired_count: 0, present_mode: config.present_mode, + format: config.format, + size: config.extent, }); Ok(()) @@ -599,9 +614,31 @@ impl crate::Surface for Surface { &mut self, timeout_ms: u32, ) -> Result>, crate::SurfaceError> { - Ok(None) + let sc = self.swap_chain.as_mut().unwrap(); + + sc.wait(timeout_ms)?; + + let base_index = sc.raw.GetCurrentBackBufferIndex() as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index], + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: Texture) { + let sc = self.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; } - unsafe fn discard_texture(&mut self, texture: Texture) {} } impl crate::Queue for Queue { @@ -627,7 +664,7 @@ impl crate::Queue for Queue { unsafe fn present( &mut self, surface: &mut Surface, - texture: Texture, + _texture: Texture, ) -> Result<(), crate::SurfaceError> { let sc = surface.swap_chain.as_mut().unwrap(); sc.acquired_count -= 1;