diff --git a/Cargo.lock b/Cargo.lock index 9f32861144..c1a33fb97b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -444,6 +444,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "d3d12" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "091ed1b25fe47c7ff129fc440c23650b6114f36aa00bc7212cc8041879294428" +dependencies = [ + "bitflags", + "libloading 0.7.0", + "winapi 0.3.9", +] + [[package]] name = "darling" version = "0.10.2" @@ -1040,7 +1051,7 @@ dependencies = [ [[package]] name = "naga" version = "0.5.0" -source = "git+https://github.com/gfx-rs/naga?rev=0b9af95793e319817e74a30601cbcd4bad9bb3e6#0b9af95793e319817e74a30601cbcd4bad9bb3e6" +source = "git+https://github.com/gfx-rs/naga?rev=458db0b#458db0b5228854dc417283f4b9742e03f25bc492" dependencies = [ "bit-set", "bitflags", @@ -1458,6 +1469,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "range-alloc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e935c45e09cc6dcf00d2f0b2d630a58f4095320223d47fc68918722f0538b6" + [[package]] name = "raw-window-handle" version = "0.3.3" @@ -1981,9 +1998,11 @@ version = "0.9.0" dependencies = [ "arrayvec", "ash", + "bit-set", "bitflags", "block", "core-graphics-types", + "d3d12", "env_logger", "foreign-types", "fxhash", @@ -1998,6 +2017,7 @@ dependencies = [ "naga", "objc", "parking_lot", + "range-alloc", "raw-window-handle", "renderdoc-sys", "thiserror", diff --git a/README.md b/README.md index 8edc2907c7..0bb725b5c4 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ If you are looking for the native implementation or bindings to the API in other API | Windows 7/10 | Linux & Android | macOS & iOS | ----- | ------------------ | ------------------ | ------------------ | DX11 | :construction: | | | - DX12 | :construction: | | | + DX12 | :ok: | | | Vulkan | :white_check_mark: | :white_check_mark: | | Metal | | | :white_check_mark: | GLes3 | | :ok: | | diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 59519a2194..93583303d5 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -36,7 +36,7 @@ thiserror = "1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in"] [dependencies.wgt] @@ -57,7 +57,7 @@ hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["metal"] } hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["vulkan", "gles"] } [target.'cfg(all(not(target_arch = "wasm32"), windows))'.dependencies] -hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["vulkan"] } +hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["vulkan", "dx12"] } [build-dependencies] cfg_aliases = "0.1" diff --git a/wgpu-core/build.rs b/wgpu-core/build.rs index f9cba2a418..38dad2eac7 100644 --- a/wgpu-core/build.rs +++ b/wgpu-core/build.rs @@ -9,7 +9,7 @@ fn main() { // Backends vulkan: { all(not(wasm), any(windows, unix_wo_apple)) }, metal: { all(not(wasm), apple) }, - dx12: { all(false, not(wasm), windows) }, + dx12: { all(not(wasm), windows) }, dx11: { all(false, not(wasm), windows) }, gl: { all(not(wasm), unix_wo_apple) }, } diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 883ae6ceee..7cafb9956f 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -15,6 +15,7 @@ use crate::{ memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction}, pipeline::PipelineFlags, resource::{Texture, TextureView, TextureViewSource}, + swap_chain::SwapChain, track::{StatefulTrackerSubset, TextureSelector, UsageConflict}, validation::{ check_buffer_usage, check_texture_usage, MissingBufferUsageError, MissingTextureUsageError, @@ -491,11 +492,11 @@ where struct RenderAttachment<'a> { texture_id: &'a Stored, selector: &'a TextureSelector, - previous_use: Option, - new_use: hal::TextureUses, + usage: hal::TextureUses, } -type AttachmentDataVec = ArrayVec; +const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_TARGETS + hal::MAX_COLOR_TARGETS + 1; +type AttachmentDataVec = ArrayVec; struct RenderPassInfo<'a, A: hal::Api> { context: RenderPassContext, @@ -514,6 +515,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, cmd_buf: &mut CommandBuffer, view_guard: &'a Storage, id::TextureViewId>, + swap_chain_guard: &'a Storage, id::SwapChainId>, ) -> Result { profiling::scope!("start", "RenderPassInfo"); @@ -527,7 +529,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { let mut attachment_type_name = ""; let mut extent = None; let mut sample_count = 0; - let mut used_swap_chain = None::>; + let mut used_swap_chain = None::<(Stored, hal::TextureUses)>; let mut add_view = |view: &TextureView, type_name| { if let Some(ex) = extent { @@ -577,12 +579,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { } }; - // Using render pass for transition. - let previous_use = cmd_buf - .trackers - .textures - .query(source_id.value, view.selector.clone()); - let new_use = if at.is_read_only(ds_aspects)? { + let usage = if at.is_read_only(ds_aspects)? { is_ds_read_only = true; hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::SAMPLED } else { @@ -591,16 +588,13 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { render_attachments.push(RenderAttachment { texture_id: source_id, selector: &view.selector, - previous_use, - new_use, + usage, }); - let old_use = previous_use.unwrap_or(new_use); depth_stencil = Some(hal::DepthStencilAttachment { target: hal::Attachment { view: &view.raw, - usage: new_use, - boundary_usage: old_use..new_use, + usage, }, depth_ops: at.depth.hal_ops(), stencil_ops: at.stencil.hal_ops(), @@ -626,33 +620,22 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { )); } - let boundary_usage = match color_view.source { + match color_view.source { TextureViewSource::Native(ref source_id) => { - let previous_use = cmd_buf - .trackers - .textures - .query(source_id.value, color_view.selector.clone()); - let new_use = hal::TextureUses::COLOR_TARGET; render_attachments.push(RenderAttachment { texture_id: source_id, selector: &color_view.selector, - previous_use, - new_use, + usage: hal::TextureUses::COLOR_TARGET, }); - - let old_use = previous_use.unwrap_or(new_use); - old_use..new_use } TextureViewSource::SwapChain(ref source_id) => { - assert!(used_swap_chain.is_none()); - used_swap_chain = Some(source_id.clone()); - - let end = hal::TextureUses::empty(); - let start = match at.channel.load_op { + //HACK: guess the start usage based on the load op + let start_usage = match at.channel.load_op { + LoadOp::Load => hal::TextureUses::empty(), LoadOp::Clear => hal::TextureUses::UNINITIALIZED, - LoadOp::Load => end, }; - start..end + assert!(used_swap_chain.is_none()); + used_swap_chain = Some((source_id.clone(), start_usage)); } }; @@ -676,34 +659,25 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { return Err(RenderPassErrorInner::InvalidResolveTargetSampleCount); } - let boundary_usage = match resolve_view.source { + match resolve_view.source { TextureViewSource::Native(ref source_id) => { - let previous_use = cmd_buf - .trackers - .textures - .query(source_id.value, resolve_view.selector.clone()); - let new_use = hal::TextureUses::COLOR_TARGET; render_attachments.push(RenderAttachment { texture_id: source_id, selector: &resolve_view.selector, - previous_use, - new_use, + usage: hal::TextureUses::COLOR_TARGET, }); - - let old_use = previous_use.unwrap_or(new_use); - old_use..new_use } TextureViewSource::SwapChain(ref source_id) => { + //HACK: guess the start usage + let start_usage = hal::TextureUses::UNINITIALIZED; assert!(used_swap_chain.is_none()); - used_swap_chain = Some(source_id.clone()); - hal::TextureUses::UNINITIALIZED..hal::TextureUses::empty() + used_swap_chain = Some((source_id.clone(), start_usage)); } }; hal_resolve_target = Some(hal::Attachment { view: &resolve_view.raw, usage: hal::TextureUses::COLOR_TARGET, - boundary_usage, }); } @@ -711,7 +685,6 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { target: hal::Attachment { view: &color_view.raw, usage: hal::TextureUses::COLOR_TARGET, - boundary_usage, }, resolve_target: hal_resolve_target, ops: at.channel.hal_ops(), @@ -723,6 +696,21 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { return Err(RenderPassErrorInner::InvalidSampleCount(sample_count)); } + if let Some((ref sc_id, start_usage)) = used_swap_chain { + let &(_, ref suf_texture) = swap_chain_guard[sc_id.value] + .acquired_texture + .as_ref() + .unwrap(); + let barrier = hal::TextureBarrier { + texture: std::borrow::Borrow::borrow(suf_texture), + usage: start_usage..hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }; + unsafe { + cmd_buf.encoder.raw.transition_textures(iter::once(barrier)); + } + } + let view_data = AttachmentData { colors: color_attachments .iter() @@ -756,7 +744,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { context, trackers: StatefulTrackerSubset::new(A::VARIANT), render_attachments, - used_swap_chain, + used_swap_chain: used_swap_chain.map(|(sc_id, _)| sc_id), is_ds_read_only, extent, _phantom: PhantomData, @@ -767,9 +755,28 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { mut self, raw: &mut A::CommandEncoder, texture_guard: &Storage, id::TextureId>, + swap_chain_guard: &Storage, id::SwapChainId>, ) -> Result<(StatefulTrackerSubset, Option>), RenderPassErrorInner> { profiling::scope!("finish", "RenderPassInfo"); + unsafe { + raw.end_render_pass(); + } + + if let Some(ref sc_id) = self.used_swap_chain { + let &(_, ref suf_texture) = swap_chain_guard[sc_id.value] + .acquired_texture + .as_ref() + .unwrap(); + let barrier = hal::TextureBarrier { + texture: std::borrow::Borrow::borrow(suf_texture), + usage: hal::TextureUses::COLOR_TARGET..hal::TextureUses::empty(), + range: wgt::ImageSubresourceRange::default(), + }; + unsafe { + raw.transition_textures(iter::once(barrier)); + } + } for ra in self.render_attachments { let texture = &texture_guard[ra.texture_id.value]; @@ -782,29 +789,11 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { ra.texture_id.value, &ra.texture_id.ref_count, ra.selector.clone(), - ra.new_use, + ra.usage, ) .map_err(UsageConflict::from)?; - - if let Some(usage) = ra.previous_use { - // Make the attachment tracks to be aware of the internal - // transition done by the render pass, by registering the - // previous usage as the initial state. - self.trackers - .textures - .prepend( - ra.texture_id.value, - &ra.texture_id.ref_count, - ra.selector.clone(), - usage, - ) - .unwrap(); - } } - unsafe { - raw.end_render_pass(); - } Ok((self.trackers, self.used_swap_chain)) } } @@ -842,7 +831,7 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); let (pass_raw, trackers, query_reset_state) = { - // read-only lock guard + let (swap_chain_guard, mut token) = hub.swap_chains.read(&mut token); let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = @@ -886,6 +875,7 @@ impl Global { depth_stencil_attachment, cmd_buf, &*view_guard, + &*swap_chain_guard, ) .map_pass_err(scope)?; @@ -1206,8 +1196,14 @@ impl Global { } RenderCommand::SetBlendConstant(ref color) => { state.blend_constant = OptionalState::Set; + let array = [ + color.r as f32, + color.g as f32, + color.b as f32, + color.a as f32, + ]; unsafe { - raw.set_blend_constants(color); + raw.set_blend_constants(&array); } } RenderCommand::SetStencilReference(value) => { @@ -1738,8 +1734,10 @@ impl Global { } log::trace!("Merging {:?} with the render pass", encoder_id); - let (trackers, used_swapchain) = - info.finish(raw, &*texture_guard).map_pass_err(scope)?; + let (trackers, used_swapchain) = info + .finish(raw, &*texture_guard, &*swap_chain_guard) + .map_pass_err(scope)?; + let raw_cmd_buf = unsafe { raw.end_encoding() .map_err(|_| RenderPassErrorInner::OutOfMemory) diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index ef974c15bf..5b679c40e0 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -120,27 +120,34 @@ pub(crate) fn extract_texture_selector( }); } - let layers = match texture.desc.dimension { - wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { - copy_texture.origin.z..copy_texture.origin.z + copy_size.depth_or_array_layers - } - wgt::TextureDimension::D3 => 0..1, + let (layers, origin_z) = match texture.desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => ( + copy_texture.origin.z..copy_texture.origin.z + copy_size.depth_or_array_layers, + 0, + ), + wgt::TextureDimension::D3 => (0..1, copy_texture.origin.z), + }; + let base = hal::TextureCopyBase { + origin: wgt::Origin3d { + x: copy_texture.origin.x, + y: copy_texture.origin.y, + z: origin_z, + }, + // this value will be incremented per copied layer + array_layer: layers.start, + mip_level: copy_texture.mip_level, + aspect: copy_aspect, }; let selector = TextureSelector { levels: copy_texture.mip_level..copy_texture.mip_level + 1, layers, }; - let base = hal::TextureCopyBase { - origin: copy_texture.origin, - mip_level: copy_texture.mip_level, - aspect: copy_aspect, - }; Ok((selector, base, format)) } /// Function copied with some modifications from webgpu standard -/// If successful, returns number of buffer bytes required for this copy. +/// If successful, returns (number of buffer bytes required for this copy, number of bytes between array layers). pub(crate) fn validate_linear_texture_data( layout: &wgt::ImageDataLayout, format: wgt::TextureFormat, @@ -149,7 +156,7 @@ pub(crate) fn validate_linear_texture_data( bytes_per_block: BufferAddress, copy_size: &Extent3d, need_copy_aligned_rows: bool, -) -> Result { +) -> Result<(BufferAddress, BufferAddress), TransferError> { // Convert all inputs to BufferAddress (u64) to prevent overflow issues let copy_width = copy_size.width as BufferAddress; let copy_height = copy_size.height as BufferAddress; @@ -202,10 +209,10 @@ pub(crate) fn validate_linear_texture_data( } let bytes_in_last_row = block_size * width_in_blocks; + let bytes_per_image = bytes_per_row * block_rows_per_image; let required_bytes_in_copy = if copy_width == 0 || copy_height == 0 || copy_depth == 0 { 0 } else { - let bytes_per_image = bytes_per_row * block_rows_per_image; let bytes_in_last_slice = bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; bytes_per_image * (copy_depth - 1) + bytes_in_last_slice }; @@ -227,17 +234,17 @@ pub(crate) fn validate_linear_texture_data( if copy_height > 1 && bytes_per_row < bytes_in_last_row { return Err(TransferError::InvalidBytesPerRow); } - Ok(required_bytes_in_copy) + Ok((required_bytes_in_copy, bytes_per_image)) } /// Function copied with minor modifications from webgpu standard -/// Returns the (virtual) mip level extent. +/// Returns the HAL copy extent and the layer count. pub(crate) fn validate_texture_copy_range( texture_copy_view: &ImageCopyTexture, desc: &wgt::TextureDescriptor<()>, texture_side: CopySide, copy_size: &Extent3d, -) -> Result { +) -> Result<(hal::CopyExtent, u32), TransferError> { let (block_width, block_height) = desc.format.describe().block_dimensions; let block_width = block_width as u32; let block_height = block_height as u32; @@ -295,7 +302,28 @@ pub(crate) fn validate_texture_copy_range( return Err(TransferError::UnalignedCopyHeight); } - Ok(extent_virtual) + let (depth, array_layer_count) = match desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + (1, copy_size.depth_or_array_layers) + } + wgt::TextureDimension::D3 => ( + copy_size + .depth_or_array_layers + .min(extent_virtual.depth_or_array_layers), + 1, + ), + }; + + // WebGPU uses the physical size of the texture for copies whereas vulkan uses + // the virtual size. We have passed validation, so it's safe to use the + // image extent data directly. We want the provided copy size to be no larger than + // the virtual size. + let copy_extent = hal::CopyExtent { + width: copy_size.width.min(extent_virtual.width), + height: copy_size.width.min(extent_virtual.height), + depth, + }; + Ok((copy_extent, array_layer_count)) } impl Global { @@ -505,13 +533,13 @@ impl Global { let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_texture)); let format_desc = dst_texture.desc.format.describe(); - let max_image_extent = validate_texture_copy_range( + let (hal_copy_size, array_layer_count) = validate_texture_copy_range( destination, &dst_texture.desc, CopySide::Destination, copy_size, )?; - let required_buffer_bytes_in_copy = validate_linear_texture_data( + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( &source.layout, dst_texture.desc.format, src_buffer.size, @@ -538,24 +566,22 @@ impl Global { ); } - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::BufferTextureCopy { - buffer_layout: source.layout, - texture_base: dst_base, - size: Extent3d { - width: copy_size.width.min(max_image_extent.width), - height: copy_size.height.min(max_image_extent.height), - depth_or_array_layers: copy_size.depth_or_array_layers, - }, - }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = source.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_buffers(src_barriers); cmd_buf_raw.transition_textures(dst_barriers); - cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, iter::once(region)); + cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions); } Ok(()) } @@ -635,9 +661,9 @@ impl Global { let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_buffer)); let format_desc = src_texture.desc.format.describe(); - let max_image_extent = + let (hal_copy_size, array_layer_count) = validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; - let required_buffer_bytes_in_copy = validate_linear_texture_data( + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( &destination.layout, src_texture.desc.format, dst_buffer.size, @@ -667,19 +693,17 @@ impl Global { }), ); - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::BufferTextureCopy { - buffer_layout: destination.layout, - texture_base: src_base, - size: Extent3d { - width: copy_size.width.min(max_image_extent.width), - height: copy_size.height.min(max_image_extent.height), - depth_or_array_layers: copy_size.depth_or_array_layers, - }, - }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = src_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = destination.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_buffers(dst_barriers); @@ -688,7 +712,7 @@ impl Global { src_raw, hal::TextureUses::COPY_SRC, dst_raw, - iter::once(region), + regions, ); } Ok(()) @@ -725,11 +749,11 @@ impl Global { return Ok(()); } - let (src_range, src_base, _) = + let (src_range, src_tex_base, _) = extract_texture_selector(source, copy_size, &*texture_guard)?; - let (dst_range, dst_base, _) = + let (dst_range, dst_tex_base, _) = extract_texture_selector(destination, copy_size, &*texture_guard)?; - if src_base.aspect != dst_base.aspect { + if src_tex_base.aspect != dst_tex_base.aspect { return Err(TransferError::MismatchedAspects.into()); } @@ -777,32 +801,31 @@ impl Global { } barriers.extend(dst_pending.map(|pending| pending.into_hal(dst_texture))); - let max_src_image_extent = + let (src_copy_size, array_layer_count) = validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; - let max_dst_image_extent = validate_texture_copy_range( + let (dst_copy_size, _) = validate_texture_copy_range( destination, &dst_texture.desc, CopySide::Destination, copy_size, )?; - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::TextureCopy { - src_base, - dst_base, - size: Extent3d { - width: copy_size - .width - .min(max_src_image_extent.width.min(max_dst_image_extent.width)), - height: copy_size - .height - .min(max_src_image_extent.height.min(max_dst_image_extent.height)), - depth_or_array_layers: copy_size.depth_or_array_layers, - }, + let hal_copy_size = hal::CopyExtent { + width: src_copy_size.width.min(dst_copy_size.width), + height: src_copy_size.height.min(dst_copy_size.height), + depth: src_copy_size.depth.min(dst_copy_size.depth), }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut src_base = src_tex_base.clone(); + let mut dst_base = dst_tex_base.clone(); + src_base.array_layer += rel_array_layer; + dst_base.array_layer += rel_array_layer; + hal::TextureCopy { + src_base, + dst_base, + size: hal_copy_size, + } + }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_textures(barriers.into_iter()); @@ -810,7 +833,7 @@ impl Global { src_raw, hal::TextureUses::COPY_SRC, dst_raw, - iter::once(region), + regions, ); } Ok(()) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 59de610407..1b9bd3ac34 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -728,11 +728,38 @@ impl Device { }); } + // filter the usages based on the other criteria + let usage = { + let mask_copy = !(hal::TextureUses::COPY_SRC | hal::TextureUses::COPY_DST); + let mask_dimension = match view_dim { + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + hal::TextureUses::SAMPLED + } + wgt::TextureViewDimension::D3 => { + hal::TextureUses::SAMPLED + | hal::TextureUses::STORAGE_LOAD + | hal::TextureUses::STORAGE_STORE + } + _ => hal::TextureUses::all(), + }; + let mask_mip_level = if end_layer != desc.range.base_array_layer + 1 { + hal::TextureUses::SAMPLED + } else { + hal::TextureUses::all() + }; + texture.hal_usage & mask_copy & mask_dimension & mask_mip_level + }; + + log::debug!( + "Create view for texture {:?} filters usages to {:?}", + texture_id, + usage + ); let hal_desc = hal::TextureViewDescriptor { label: desc.label.borrow_option(), format, dimension: view_dim, - usage: texture.hal_usage, // pass-through + usage, range: desc.range.clone(), }; @@ -2391,6 +2418,7 @@ impl Device { /// Wait for idle and remove resources that we can, before we die. pub(crate) fn prepare_to_die(&mut self) { + self.pending_writes.deactivate(); let mut life_tracker = self.life_tracker.lock(); let current_index = self.active_submission_index; if let Err(error) = unsafe { self.raw.wait(&self.fence, current_index, CLEANUP_WAIT_MS) } { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index a68766a835..577a6fd7e7 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -166,6 +166,15 @@ impl PendingWrites { } &mut self.command_encoder } + + pub fn deactivate(&mut self) { + if self.is_active { + unsafe { + self.command_encoder.discard_encoding(); + } + self.is_active = false; + } + } } #[derive(Default)] @@ -433,10 +442,10 @@ impl Global { } let (texture_guard, _) = hub.textures.read(&mut token); - let (selector, texture_base, texture_format) = + let (selector, dst_base, texture_format) = extract_texture_selector(destination, size, &*texture_guard)?; let format_desc = texture_format.describe(); - validate_linear_texture_data( + let (_, bytes_per_array_layer) = validate_linear_texture_data( data_layout, texture_format, data.len() as wgt::BufferAddress, @@ -495,7 +504,7 @@ impl Global { TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), ); } - let max_image_extent = + let (hal_copy_size, array_layer_count) = validate_texture_copy_range(destination, &dst.desc, CopySide::Destination, size)?; dst.life_guard.use_at(device.active_submission_index + 1); @@ -542,33 +551,29 @@ impl Global { .map_err(DeviceError::from)?; } - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::BufferTextureCopy { - buffer_layout: wgt::ImageDataLayout { - offset: 0, - bytes_per_row: NonZeroU32::new(stage_bytes_per_row), - rows_per_image: NonZeroU32::new(block_rows_per_image), - }, - texture_base, - size: wgt::Extent3d { - width: size.width.min(max_image_extent.width), - height: size.height.min(max_image_extent.height), - depth_or_array_layers: size.depth_or_array_layers, - }, - }; - + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: rel_array_layer as u64 * bytes_per_array_layer, + bytes_per_row: NonZeroU32::new(stage_bytes_per_row), + rows_per_image: NonZeroU32::new(block_rows_per_image), + }, + texture_base, + size: hal_copy_size, + } + }); let barrier = hal::BufferBarrier { buffer: &stage.buffer, usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, }; + let encoder = device.pending_writes.activate(); unsafe { encoder.transition_buffers(iter::once(barrier)); encoder.transition_textures(transition.map(|pending| pending.into_hal(dst))); - encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, iter::once(region)); + encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); } device.pending_writes.consume(stage); diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 1e676e3c9d..5d632211da 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -636,6 +636,14 @@ impl Hub { } } + // destroy command buffers first, since otherwise DX12 isn't happy + for element in self.command_buffers.data.write().map.drain(..) { + if let Element::Occupied(command_buffer, _) = element { + let device = &devices[command_buffer.device_id.value]; + device.destroy_command_buffer(command_buffer); + } + } + for element in self.samplers.data.write().map.drain(..) { if let Element::Occupied(sampler, _) = element { unsafe { @@ -673,12 +681,6 @@ impl Hub { devices[buffer.device_id.value].destroy_buffer(buffer); } } - for element in self.command_buffers.data.write().map.drain(..) { - if let Element::Occupied(command_buffer, _) = element { - let device = &devices[command_buffer.device_id.value]; - device.destroy_command_buffer(command_buffer); - } - } for element in self.bind_groups.data.write().map.drain(..) { if let Element::Occupied(bind_group, _) = element { let device = &devices[bind_group.device_id.value]; @@ -957,7 +959,6 @@ impl HalApi for hal::api::Metal { } } -/* #[cfg(dx12)] impl HalApi for hal::api::Dx12 { const VARIANT: Backend = Backend::Dx12; @@ -969,6 +970,7 @@ impl HalApi for hal::api::Dx12 { } } +/* #[cfg(dx11)] impl HalApi for hal::api::Dx11 { const VARIANT: Backend = Backend::Dx11; diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index ceab234c60..01921870f8 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -110,9 +110,9 @@ impl Instance { #[cfg(metal)] metal: init::(backends), #[cfg(dx12)] - dx12: init(Backend::Dx12, backends), + dx12: init::(backends), #[cfg(dx11)] - dx11: init(Backend::Dx11, backends), + dx11: init::(backends), #[cfg(gl)] gl: init::(backends), } diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index 90e91f4462..968eff3e66 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -201,8 +201,8 @@ macro_rules! gfx_select { wgt::Backend::Vulkan => $global.$method::<$crate::api::Vulkan>( $($param),* ), #[cfg(all(not(target_arch = "wasm32"), any(target_os = "ios", target_os = "macos")))] wgt::Backend::Metal => $global.$method::<$crate::api::Metal>( $($param),* ), - //#[cfg(all(not(target_arch = "wasm32"), windows))] - //wgt::Backend::Dx12 => $global.$method::<$crate::api::Dx12>( $($param),* ), + #[cfg(all(not(target_arch = "wasm32"), windows))] + wgt::Backend::Dx12 => $global.$method::<$crate::api::Dx12>( $($param),* ), //#[cfg(all(not(target_arch = "wasm32"), windows))] //wgt::Backend::Dx11 => $global.$method::<$crate::api::Dx11>( $($param),* ), #[cfg(all(not(target_arch = "wasm32"), unix, not(any(target_os = "ios", target_os = "macos"))))] diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs index 8fb2da3699..35cceb253a 100644 --- a/wgpu-core/src/track/buffer.rs +++ b/wgpu-core/src/track/buffer.rs @@ -75,25 +75,6 @@ impl ResourceState for BufferState { Ok(()) } - fn prepend( - &mut self, - id: Valid, - _selector: Self::Selector, - usage: Self::Usage, - ) -> Result<(), PendingTransition> { - match self.first { - Some(old) if old != usage => Err(PendingTransition { - id, - selector: (), - usage: old..usage, - }), - _ => { - self.first = Some(usage); - Ok(()) - } - } - } - fn merge( &mut self, id: Valid, @@ -205,30 +186,4 @@ mod test { } ); } - - #[test] - fn prepend() { - let mut bs = Unit { - first: None, - last: BufferUses::VERTEX, - }; - let id = Id::dummy(); - bs.prepend(id, (), BufferUses::INDEX).unwrap(); - bs.prepend(id, (), BufferUses::INDEX).unwrap(); - assert_eq!( - bs.prepend(id, (), BufferUses::STORAGE_LOAD), - Err(PendingTransition { - id, - selector: (), - usage: BufferUses::INDEX..BufferUses::STORAGE_LOAD, - }) - ); - assert_eq!( - bs, - Unit { - first: Some(BufferUses::INDEX), - last: BufferUses::VERTEX, - } - ); - } } diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index 5d6af42d8a..ce5e70a85c 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -76,14 +76,6 @@ pub(crate) trait ResourceState: Clone + Default { output: Option<&mut Vec>>, ) -> Result<(), PendingTransition>; - /// Sets up the first usage of the selected sub-resources. - fn prepend( - &mut self, - id: Valid, - selector: Self::Selector, - usage: Self::Usage, - ) -> Result<(), PendingTransition>; - /// Merge the state of this resource tracked by a different instance /// with the current one. /// @@ -309,6 +301,7 @@ impl ResourceTracker { /// /// Returns `Some(Usage)` only if this usage is consistent /// across the given selector. + #[allow(unused)] // TODO: figure out if this needs to be removed pub fn query(&self, id: Valid, selector: S::Selector) -> Option { let (index, epoch, backend) = id.0.unzip(); debug_assert_eq!(backend, self.backend); @@ -397,21 +390,6 @@ impl ResourceTracker { self.temp.drain(..) } - /// Turn the tracking from the "expand" mode into the "replace" one, - /// installing the selected usage as the "first". - /// This is a special operation only used by the render pass attachments. - pub(crate) fn prepend( - &mut self, - id: Valid, - ref_count: &RefCount, - selector: S::Selector, - usage: S::Usage, - ) -> Result<(), PendingTransition> { - Self::get_or_insert(self.backend, &mut self.map, id, ref_count) - .state - .prepend(id, selector, usage) - } - /// Merge another tracker into `self` by extending the current states /// without any transitions. pub(crate) fn merge_extend(&mut self, other: &Self) -> Result<(), PendingTransition> { @@ -528,15 +506,6 @@ impl ResourceState for PhantomData { Ok(()) } - fn prepend( - &mut self, - _id: Valid, - _selector: Self::Selector, - _usage: Self::Usage, - ) -> Result<(), PendingTransition> { - Ok(()) - } - fn merge( &mut self, _id: Valid, diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs index 84e3ae7499..d3fe33528f 100644 --- a/wgpu-core/src/track/texture.rs +++ b/wgpu-core/src/track/texture.rs @@ -136,40 +136,6 @@ impl ResourceState for TextureState { Ok(()) } - fn prepend( - &mut self, - id: Valid, - selector: Self::Selector, - usage: Self::Usage, - ) -> Result<(), PendingTransition> { - assert!(self.mips.len() >= selector.levels.end as usize); - for (mip_id, mip) in self.mips[selector.levels.start as usize..selector.levels.end as usize] - .iter_mut() - .enumerate() - { - let level = selector.levels.start + mip_id as u32; - let layers = mip.isolate(&selector.layers, Unit::new(usage)); - for &mut (ref range, ref mut unit) in layers { - match unit.first { - Some(old) if old != usage => { - return Err(PendingTransition { - id, - selector: TextureSelector { - levels: level..level + 1, - layers: range.clone(), - }, - usage: old..usage, - }); - } - _ => { - unit.first = Some(usage); - } - } - } - } - Ok(()) - } - fn merge( &mut self, id: Valid, diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 17108e824d..dd968b33fe 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -16,6 +16,7 @@ default = [] metal = ["naga/msl-out", "block", "foreign-types"] vulkan = ["naga/spv-out", "ash", "gpu-alloc", "gpu-descriptor", "libloading", "inplace_it", "renderdoc-sys"] gles = ["naga/glsl-out", "glow", "egl", "libloading"] +dx12 = ["naga/hlsl-out", "native", "bit-set", "range-alloc", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] [dependencies] bitflags = "1.0" @@ -39,6 +40,10 @@ inplace_it = { version ="0.3.3", optional = true } renderdoc-sys = { version = "0.7.1", optional = true } # backend: Gles glow = { git = "https://github.com/grovesNL/glow", rev = "0864897a28bbdd43f89f4fd8fdd4ed781b719f8a", optional = true } +# backend: Dx12 +bit-set = { version = "0.5", optional = true } +native = { package = "d3d12", version = "0.4", features = ["libloading"], optional = true } +range-alloc = { version = "0.1", optional = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] egl = { package = "khronos-egl", version = "4.1", features = ["dynamic"], optional = true } @@ -54,11 +59,11 @@ core-graphics-types = "0.1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in"] [dev-dependencies] diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index e738ea892e..9e762bb3a8 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -302,9 +302,14 @@ impl Example { texture_base: hal::TextureCopyBase { origin: wgt::Origin3d::ZERO, mip_level: 0, + array_layer: 0, aspect: hal::FormatAspects::COLOR, }, - size: texture_desc.size, + size: hal::CopyExtent { + width: 1, + height: 1, + depth: 1, + }, }; unsafe { cmd_encoder.transition_buffers(iter::once(buffer_barrier)); @@ -588,11 +593,18 @@ impl Example { let ctx = &mut self.contexts[self.context_index]; + let surface_tex = unsafe { self.surface.acquire_texture(!0).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COLOR_TARGET, + }; unsafe { ctx.encoder.begin_encoding(Some("frame")).unwrap(); + ctx.encoder.transition_textures(iter::once(target_barrier0)); } - let surface_tex = unsafe { self.surface.acquire_texture(!0).unwrap().unwrap().texture }; let surface_view_desc = hal::TextureViewDescriptor { label: None, format: self.surface_format, @@ -617,7 +629,6 @@ impl Example { target: hal::Attachment { view: &surface_tex_view, usage: hal::TextureUses::COLOR_TARGET, - boundary_usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::empty(), }, resolve_target: None, ops: hal::AttachmentOps::STORE, @@ -650,8 +661,17 @@ impl Example { ctx.frames_recorded += 1; let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COLOR_TARGET..hal::TextureUses::empty(), + }; unsafe { ctx.encoder.end_render_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + } + + unsafe { let cmd_buf = ctx.encoder.end_encoding().unwrap(); let fence_param = if do_fence { Some((&mut ctx.fence, ctx.fence_value)) @@ -699,7 +719,19 @@ type Api = hal::api::Metal; type Api = hal::api::Vulkan; #[cfg(all(feature = "gles", not(feature = "metal"), not(feature = "vulkan")))] type Api = hal::api::Gles; -#[cfg(not(any(feature = "metal", feature = "vulkan", feature = "gles")))] +#[cfg(all( + feature = "dx12", + not(feature = "metal"), + not(feature = "vulkan"), + not(feature = "gles") +))] +type Api = hal::api::Dx12; +#[cfg(not(any( + feature = "metal", + feature = "vulkan", + feature = "gles", + feature = "dx12" +)))] type Api = hal::api::Empty; fn main() { diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs new file mode 100644 index 0000000000..040e6083f5 --- /dev/null +++ b/wgpu-hal/src/dx12/adapter.rs @@ -0,0 +1,390 @@ +use super::{conv, HResult as _}; +use std::{mem, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_5, minwindef, windef, winerror}, + um::{d3d12, winuser}, +}; + +impl Drop for super::Adapter { + fn drop(&mut self) { + unsafe { + self.raw.destroy(); + } + } +} + +impl super::Adapter { + #[allow(trivial_casts)] + pub(super) fn expose( + adapter: native::WeakPtr, + library: &Arc, + instance_flags: crate::InstanceFlags, + ) -> Option> { + // Create the device so that we can get the capabilities. + let device = match library.create_device(adapter, native::FeatureLevel::L11_0) { + Ok(pair) => match pair.into_result() { + Ok(device) => device, + Err(err) => { + log::warn!("Device creation failed: {}", err); + return None; + } + }, + Err(err) => { + log::warn!("Device creation function is not found: {:?}", err); + return None; + } + }; + + // We have found a possible adapter. + // Acquire the device information. + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.GetDesc2(&mut desc); + } + + let device_name = { + use std::{ffi::OsString, os::windows::ffi::OsStringExt}; + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = OsString::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + let mut features_architecture: d3d12::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::() as _, + ) + }); + + let mut workarounds = super::Workarounds::default(); + + let info = wgt::AdapterInfo { + backend: wgt::Backend::Dx12, + name: device_name, + vendor: desc.VendorId as usize, + device: desc.DeviceId as usize, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + wgt::DeviceType::VirtualGpu + } else if features_architecture.CacheCoherentUMA != 0 { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + }, + }; + + let mut options: d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_D3D12_OPTIONS, + &mut options as *mut _ as *mut _, + mem::size_of::() as _, + ) + }); + + let _depth_bounds_test_supported = { + let mut features2: d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::() as _, + ) + }; + hr == 0 && features2.DepthBoundsTestSupported != 0 + }; + + let private_caps = super::PrivateCapabilities { + heterogeneous_resource_heaps: options.ResourceHeapTier + != d3d12::D3D12_RESOURCE_HEAP_TIER_1, + memory_architecture: if features_architecture.UMA != 0 { + super::MemoryArchitecture::Unified { + cache_coherent: features_architecture.CacheCoherentUMA != 0, + } + } else { + super::MemoryArchitecture::NonUnified + }, + shader_debug_info: instance_flags.contains(crate::InstanceFlags::DEBUG), + heap_create_not_zeroed: false, //TODO: winapi support for Options7 + }; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let (full_heap_count, _uav_count) = match options.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => ( + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, // conservative, is 64 on feature level 11.1 + ), + d3d12::D3D12_RESOURCE_BINDING_TIER_2 => ( + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2, + 64, + ), + d3d12::D3D12_RESOURCE_BINDING_TIER_3 => ( + tier3_practical_descriptor_limit, + tier3_practical_descriptor_limit, + ), + other => { + log::warn!("Unknown resource binding tier {}", other); + ( + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, + ) + } + }; + + let mut features = wgt::Features::empty() + | wgt::Features::DEPTH_CLAMPING + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS + //TODO: Naga part + //| wgt::Features::TEXTURE_BINDING_ARRAY + //| wgt::Features::BUFFER_BINDING_ARRAY + //| wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + //| wgt::Features::UNSIZED_BINDING_ARRAY + | wgt::Features::MULTI_DRAW_INDIRECT + | wgt::Features::MULTI_DRAW_INDIRECT_COUNT + | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER + | wgt::Features::NON_FILL_POLYGON_MODE + | wgt::Features::VERTEX_WRITABLE_STORAGE + | wgt::Features::TIMESTAMP_QUERY + | wgt::Features::PIPELINE_STATISTICS_QUERY; + + features.set( + wgt::Features::CONSERVATIVE_RASTERIZATION, + options.ConservativeRasterizationTier + != d3d12::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED, + ); + + let base = wgt::Limits::default(); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + raw: adapter, + device, + library: Arc::clone(library), + private_caps, + workarounds, + }, + info, + features, + capabilities: crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: d3d12::D3D12_REQ_TEXTURE1D_U_DIMENSION, + max_texture_dimension_2d: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + .min(d3d12::D3D12_REQ_TEXTURECUBE_DIMENSION), + max_texture_dimension_3d: d3d12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + max_texture_array_layers: d3d12::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + // dynamic offsets take a root constant, so we expose the minimum here + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 128, + _ => full_heap_count, + }, + max_samplers_per_shader_stage: match options.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 16, + _ => d3d12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + }, + // these both account towards `uav_count`, but we can't express the limit as as sum + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base + .max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage: full_heap_count, + max_uniform_buffer_binding_size: d3d12::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT + * 16, + max_storage_buffer_binding_size: !0, + max_vertex_buffers: d3d12::D3D12_VS_INPUT_REGISTER_COUNT + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + max_vertex_buffer_array_stride: d3d12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + max_push_constant_size: 0, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new( + d3d12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new( + d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, + ) + .unwrap(), + uniform_buffer_offset: wgt::BufferSize::new( + d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + storage_buffer_offset: wgt::BufferSize::new(4).unwrap(), //TODO? + }, + downlevel: wgt::DownlevelCapabilities::default(), + }, + }) + } +} + +impl crate::Adapter for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + ) -> Result, crate::DeviceError> { + let queue = self + .device + .create_command_queue( + native::CmdListType::Direct, + native::Priority::Normal, + native::CommandQueueFlags::empty(), + 0, + ) + .into_device_result("Queue creation")?; + + let device = super::Device::new( + self.device, + queue, + features, + self.private_caps, + &self.library, + )?; + Ok(crate::OpenDevice { + device, + queue: super::Queue { + raw: queue, + temp_lists: Vec::new(), + }, + }) + } + + #[allow(trivial_casts)] + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let raw_format = conv::map_texture_format(format); + let mut data = d3d12::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: raw_format, + Support1: mem::zeroed(), + Support2: mem::zeroed(), + }; + assert_eq!( + winerror::S_OK, + self.device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::() as _, + ) + ); + + let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST; + let can_image = 0 + != data.Support1 + & (d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURECUBE); + caps.set(Tfc::SAMPLED, can_image); + caps.set( + Tfc::SAMPLED_LINEAR, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT_BLEND, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0, + ); + caps.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0, + ); + caps.set( + Tfc::STORAGE, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW != 0, + ); + caps.set( + Tfc::STORAGE_READ_WRITE, + data.Support2 & d3d12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0, + ); + + caps + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option { + let current_extent = { + let mut rect: windef::RECT = mem::zeroed(); + if winuser::GetClientRect(surface.wnd_handle, &mut rect) != 0 { + Some(wgt::Extent3d { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + depth_or_array_layers: 1, + }) + } else { + log::warn!("Unable to get the window client rect"); + None + } + }; + + let mut present_modes = vec![wgt::PresentMode::Fifo]; + #[allow(trivial_casts)] + if let Ok(factory5) = surface + .factory + .cast::() + .into_result() + { + let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; + let hr = factory5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::() as _, + ); + + factory5.destroy(); + match hr.into_result() { + Err(err) => log::warn!("Unable to check for tearing support: {}", err), + Ok(()) => present_modes.push(wgt::PresentMode::Immediate), + } + } + + Some(crate::SurfaceCapabilities { + formats: vec![ + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Rgba8UnormSrgb, + wgt::TextureFormat::Rgba8Unorm, + wgt::TextureFormat::Rgb10a2Unorm, + wgt::TextureFormat::Rgba16Float, + ], + // we currently use a flip effect which supports 2..=16 buffers + swap_chain_sizes: 2..=16, + current_extent, + // TODO: figure out the exact bounds + extents: wgt::Extent3d { + width: 16, + height: 16, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: 4096, + height: 4096, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + present_modes, + composite_alpha_modes: vec![ + crate::CompositeAlphaMode::Opaque, + crate::CompositeAlphaMode::PreMultiplied, + crate::CompositeAlphaMode::PostMultiplied, + ], + }) + } +} diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs new file mode 100644 index 0000000000..9f3ea39e98 --- /dev/null +++ b/wgpu-hal/src/dx12/command.rs @@ -0,0 +1,843 @@ +use super::{conv, HResult as _}; +use std::{mem, ops::Range, ptr}; +use winapi::um::d3d12; + +fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12::D3D12_BOX { + d3d12::D3D12_BOX { + left: origin.x, + top: origin.y, + right: origin.x + size.width, + bottom: origin.y + size.height, + front: origin.z, + back: origin.z + size.depth, + } +} + +impl super::Temp { + fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { + self.marker.clear(); + self.marker.extend(marker.encode_utf16()); + self.marker.push(0); + (&self.marker, self.marker.len() as u32 * 2) + } +} + +impl super::CommandEncoder { + unsafe fn begin_pass(&mut self, kind: super::PassKind, label: crate::Label) { + let list = self.list.unwrap(); + self.pass.kind = kind; + if let Some(label) = label { + let (wide_label, size) = self.temp.prepare_marker(label); + list.BeginEvent(0, wide_label.as_ptr() as *const _, size); + self.pass.has_label = true; + } + list.set_descriptor_heaps(&[self.shared.heap_views.raw, self.shared.heap_samplers.raw]); + } + + unsafe fn end_pass(&mut self) { + let list = self.list.unwrap(); + list.set_descriptor_heaps(&[]); + if self.pass.has_label { + list.EndEvent(); + } + self.pass.clear(); + } + + unsafe fn prepare_draw(&mut self) { + let list = self.list.unwrap(); + while self.pass.dirty_vertex_buffers != 0 { + let index = self.pass.dirty_vertex_buffers.trailing_zeros(); + self.pass.dirty_vertex_buffers ^= 1 << index; + list.IASetVertexBuffers( + index, + 1, + self.pass.vertex_buffers.as_ptr().offset(index as isize), + ); + } + } +} + +impl crate::CommandEncoder for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let list = match self.free_lists.pop() { + Some(list) => { + list.reset(self.allocator, native::PipelineState::null()); + list + } + None => self + .device + .create_graphics_command_list( + native::CmdListType::Direct, + self.allocator, + native::PipelineState::null(), + 0, + ) + .into_device_result("Create command list")?, + }; + + if let Some(label) = label { + let cwstr = conv::map_label(label); + list.SetName(cwstr.as_ptr()); + } + + self.list = Some(list); + self.temp.clear(); + self.pass.clear(); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + if let Some(list) = self.list.take() { + list.close(); + self.free_lists.push(list); + } + } + unsafe fn end_encoding(&mut self) -> Result { + let raw = self.list.take().unwrap(); + raw.close(); + Ok(super::CommandBuffer { raw }) + } + unsafe fn reset_all>(&mut self, command_buffers: I) { + for cmd_buf in command_buffers { + self.free_lists.push(cmd_buf.raw); + } + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator>, + { + self.temp.barriers.clear(); + + for barrier in barriers { + let s0 = conv::map_buffer_usage_to_state(barrier.usage.start); + let s1 = conv::map_buffer_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + }; + self.temp.barriers.push(raw); + } else if barrier.usage.start == crate::BufferUses::STORAGE_STORE { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.UAV_mut() = d3d12::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator>, + { + self.temp.barriers.clear(); + + for barrier in barriers { + let s0 = conv::map_texture_usage_to_state(barrier.usage.start); + let s1 = conv::map_texture_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + }; + + let mip_level_count = match barrier.range.mip_level_count { + Some(count) => count.get(), + None => barrier.texture.mip_level_count - barrier.range.base_mip_level, + }; + let array_layer_count = match barrier.range.array_layer_count { + Some(count) => count.get(), + None => barrier.texture.array_layer_count() - barrier.range.base_array_layer, + }; + + if barrier.range.aspect == wgt::TextureAspect::All + && barrier.range.base_mip_level + mip_level_count + == barrier.texture.mip_level_count + && barrier.range.base_array_layer + array_layer_count + == barrier.texture.array_layer_count() + { + // Only one barrier if it affects the whole image. + self.temp.barriers.push(raw); + } else { + // Generate barrier for each layer/level combination. + for rel_mip_level in 0..mip_level_count { + for rel_array_layer in 0..array_layer_count { + raw.u.Transition_mut().Subresource = barrier.texture.calc_subresource( + barrier.range.base_mip_level + rel_mip_level, + barrier.range.base_array_layer + rel_array_layer, + 0, + ); + self.temp.barriers.push(raw); + } + } + } + } else if barrier.usage.start == crate::TextureUses::STORAGE_STORE { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.UAV_mut() = d3d12::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + } + } + + unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) { + assert_eq!(value, 0, "Only zero is supported!"); + let list = self.list.unwrap(); + let mut offset = range.start; + while offset < range.end { + let size = super::ZERO_BUFFER_SIZE.min(range.end - offset); + list.CopyBufferRegion( + buffer.resource.as_mut_ptr(), + offset, + self.shared.zero_buffer.as_mut_ptr(), + 0, + size, + ); + offset += size; + } + } + + unsafe fn copy_buffer_to_buffer( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator, + { + let list = self.list.unwrap(); + for r in regions { + list.CopyBufferRegion( + dst.resource.as_mut_ptr(), + r.dst_offset, + src.resource.as_mut_ptr(), + r.src_offset, + r.size.get(), + ); + } + } + + unsafe fn copy_texture_to_texture( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator, + { + let list = self.list.unwrap(); + let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + + for r in regions { + let src_box = make_box(&r.src_base.origin, &r.size); + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base); + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base); + + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + r.dst_base.origin.z, + &src_location, + &src_box, + ); + } + } + + unsafe fn copy_buffer_to_texture( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator, + { + let list = self.list.unwrap(); + let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: mem::zeroed(), + }; + let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let raw_format = conv::map_texture_format(dst.format); + + for r in regions { + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + *src_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: r.buffer_layout.offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: raw_format, + Width: r.size.width, + Height: r + .buffer_layout + .rows_per_image + .map_or(r.size.height, |count| count.get()), + Depth: r.size.depth, + RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| { + count.get().max(d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }), + }, + }; + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.texture_base); + + list.CopyTextureRegion( + &dst_location, + r.texture_base.origin.x, + r.texture_base.origin.y, + r.texture_base.origin.z, + &src_location, + &src_box, + ); + } + } + + unsafe fn copy_texture_to_buffer( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator, + { + let list = self.list.unwrap(); + let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: mem::zeroed(), + }; + let raw_format = conv::map_texture_format(src.format); + + for r in regions { + let src_box = make_box(&r.texture_base.origin, &r.size); + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.texture_base); + *dst_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: r.buffer_layout.offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: raw_format, + Width: r.size.width, + Height: r + .buffer_layout + .rows_per_image + .map_or(r.size.height, |count| count.get()), + Depth: r.size.depth, + RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| count.get()), + }, + }; + + list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + self.list + .unwrap() + .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index); + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + self.list + .unwrap() + .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index); + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + self.list.unwrap().EndQuery( + set.raw.as_mut_ptr(), + d3d12::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ); + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range) { + // nothing to do here + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + self.list.unwrap().ResolveQueryData( + set.raw.as_mut_ptr(), + set.raw_ty, + range.start, + range.end - range.start, + buffer.resource.as_mut_ptr(), + offset, + ); + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { + self.begin_pass(super::PassKind::Render, desc.label); + + let mut color_views = [native::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_TARGETS]; + for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + *rtv = cat.target.view.handle_rtv.unwrap().raw; + } + let ds_view = match desc.depth_stencil_attachment { + None => ptr::null(), + Some(ref ds) => { + if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw + } else { + &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw + } + } + }; + + let list = self.list.unwrap(); + list.OMSetRenderTargets( + desc.color_attachments.len() as u32, + color_views.as_ptr(), + 0, + ds_view, + ); + + self.pass.resolves.clear(); + for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let value = [ + cat.clear_value.r as f32, + cat.clear_value.g as f32, + cat.clear_value.b as f32, + cat.clear_value.a as f32, + ]; + list.clear_render_target_view(*rtv, value, &[]); + } + if let Some(ref target) = cat.resolve_target { + self.pass.resolves.push(super::PassResolve { + src: cat.target.view.target_base, + dst: target.view.target_base, + format: target.view.raw_format, + }); + } + } + if let Some(ref ds) = desc.depth_stencil_attachment { + let mut flags = native::ClearFlags::empty(); + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) { + flags |= native::ClearFlags::DEPTH; + } + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) { + flags |= native::ClearFlags::STENCIL; + } + + if !ds_view.is_null() { + list.clear_depth_stencil_view( + *ds_view, + flags, + ds.clear_value.0, + ds.clear_value.1 as u8, + &[], + ); + } + } + } + unsafe fn end_render_pass(&mut self) { + if !self.pass.resolves.is_empty() { + let list = self.list.unwrap(); + self.temp.barriers.clear(); + + // All the targets are expected to be in `COLOR_TARGET` state, + // but D3D12 has special source/destination states for the resolves. + for resolve in self.pass.resolves.iter() { + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`. + // If it's not the case, we can include the `TextureUses` in `PassResove`. + *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.src.0.as_mut_ptr(), + Subresource: resolve.src.1, + StateBefore: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + }; + self.temp.barriers.push(barrier); + *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.dst.0.as_mut_ptr(), + Subresource: resolve.dst.1, + StateBefore: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + }; + self.temp.barriers.push(barrier); + } + list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + + for resolve in self.pass.resolves.iter() { + list.ResolveSubresource( + resolve.dst.0.as_mut_ptr(), + resolve.dst.1, + resolve.src.0.as_mut_ptr(), + resolve.src.1, + resolve.format, + ); + } + + // Flip all the barriers to reverse, back into `COLOR_TARGET`. + for barrier in self.temp.barriers.iter_mut() { + let transition = barrier.u.Transition_mut(); + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + } + + self.end_pass(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + use super::PassKind as Pk; + + let list = self.list.unwrap(); + let info = &layout.bind_group_infos[index as usize]; + let mut root_index = info.base_root_index; + + // Bind CBV/SRC/UAV descriptor tables + if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + let descriptor = group.handle_views.unwrap().gpu; + match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor), + Pk::Transfer => (), + } + root_index += 1; + } + + // Bind Sampler descriptor tables. + if info.tables.contains(super::TableTypes::SAMPLERS) { + let descriptor = group.handle_samplers.unwrap().gpu; + match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor), + Pk::Transfer => (), + } + root_index += 1; + } + + // Bind root descriptors + for ((kind, &gpu_base), &offset) in info + .dynamic_buffers + .iter() + .zip(group.dynamic_buffers.iter()) + .zip(dynamic_offsets) + { + let gpu_address = gpu_base + offset as wgt::BufferAddress; + match self.pass.kind { + Pk::Render => match *kind { + super::BufferViewKind::Constant => { + list.set_graphics_root_constant_buffer_view(root_index, gpu_address) + } + super::BufferViewKind::ShaderResource => { + list.set_graphics_root_shader_resource_view(root_index, gpu_address) + } + super::BufferViewKind::UnorderedAccess => { + list.set_graphics_root_unordered_access_view(root_index, gpu_address) + } + }, + Pk::Compute => match *kind { + super::BufferViewKind::Constant => { + list.set_compute_root_constant_buffer_view(root_index, gpu_address) + } + super::BufferViewKind::ShaderResource => { + list.set_compute_root_shader_resource_view(root_index, gpu_address) + } + super::BufferViewKind::UnorderedAccess => { + list.set_compute_root_unordered_access_view(root_index, gpu_address) + } + }, + Pk::Transfer => (), + } + } + } + unsafe fn set_push_constants( + &mut self, + _layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + _offset: u32, + _data: &[u32], + ) { + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let (wide_label, size) = self.temp.prepare_marker(label); + self.list + .unwrap() + .SetMarker(0, wide_label.as_ptr() as *const _, size); + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let (wide_label, size) = self.temp.prepare_marker(group_label); + self.list + .unwrap() + .BeginEvent(0, wide_label.as_ptr() as *const _, size); + } + unsafe fn end_debug_marker(&mut self) { + self.list.unwrap().EndEvent() + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.unwrap(); + + list.set_graphics_root_signature(pipeline.signature); + list.set_pipeline_state(pipeline.raw); + list.IASetPrimitiveTopology(pipeline.topology); + + //TODO: root signature changes require full layout rebind! + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.list.unwrap().set_index_buffer( + binding.resolve_address(), + binding.resolve_size() as u32, + conv::map_index_format(format), + ); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vb = &mut self.pass.vertex_buffers[index as usize]; + vb.BufferLocation = binding.resolve_address(); + vb.SizeInBytes = binding.resolve_size() as u32; + self.pass.dirty_vertex_buffers |= 1 << index; + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) { + let raw_vp = d3d12::D3D12_VIEWPORT { + TopLeftX: rect.x, + TopLeftY: rect.y, + Width: rect.w, + Height: rect.h, + MinDepth: depth_range.start, + MaxDepth: depth_range.end, + }; + self.list.unwrap().RSSetViewports(1, &raw_vp); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) { + let raw_rect = d3d12::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + }; + self.list.unwrap().RSSetScissorRects(1, &raw_rect); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.list.unwrap().set_stencil_reference(value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.list.unwrap().set_blend_factor(*color); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(); + self.list + .unwrap() + .draw(vertex_count, instance_count, start_vertex, start_instance); + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(); + self.list.unwrap().draw_indexed( + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(); + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(); + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + self.prepare_draw(); + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ); + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + self.prepare_draw(); + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ); + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + self.begin_pass(super::PassKind::Compute, desc.label); + } + unsafe fn end_compute_pass(&mut self) { + self.end_pass(); + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.unwrap(); + + list.set_compute_root_signature(pipeline.signature); + list.set_pipeline_state(pipeline.raw); + + //TODO: root signature changes require full layout rebind! + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.list.unwrap().dispatch(count); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } +} diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs new file mode 100644 index 0000000000..cfe11a638d --- /dev/null +++ b/wgpu-hal/src/dx12/conv.rs @@ -0,0 +1,530 @@ +use std::iter; +use winapi::{ + shared::{dxgi1_2, dxgiformat}, + um::{d3d12, d3dcommon}, +}; + +pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + match format { + Tf::R8Unorm => DXGI_FORMAT_R8_UNORM, + Tf::R8Snorm => DXGI_FORMAT_R8_SNORM, + Tf::R8Uint => DXGI_FORMAT_R8_UINT, + Tf::R8Sint => DXGI_FORMAT_R8_SINT, + Tf::R16Uint => DXGI_FORMAT_R16_UINT, + Tf::R16Sint => DXGI_FORMAT_R16_SINT, + Tf::R16Float => DXGI_FORMAT_R16_FLOAT, + Tf::Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Tf::Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Tf::Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Tf::Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Tf::R32Uint => DXGI_FORMAT_R32_UINT, + Tf::R32Sint => DXGI_FORMAT_R32_SINT, + Tf::R32Float => DXGI_FORMAT_R32_FLOAT, + Tf::Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Tf::Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Tf::Rg16Float => DXGI_FORMAT_R16G16_FLOAT, + Tf::Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + Tf::Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Tf::Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Tf::Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Tf::Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, + Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, + Tf::Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Tf::Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Tf::Rgba16Float => DXGI_FORMAT_R16G16B16A16_FLOAT, + Tf::Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Tf::Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Tf::Rgba32Float => DXGI_FORMAT_R32G32B32A32_FLOAT, + Tf::Depth32Float => DXGI_FORMAT_D32_FLOAT, + Tf::Depth24Plus => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth24PlusStencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Tf::Bc2RgbaUnorm => DXGI_FORMAT_BC2_UNORM, + Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Tf::Bc3RgbaUnorm => DXGI_FORMAT_BC3_UNORM, + Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Tf::Bc4RUnorm => DXGI_FORMAT_BC4_UNORM, + Tf::Bc4RSnorm => DXGI_FORMAT_BC4_SNORM, + Tf::Bc5RgUnorm => DXGI_FORMAT_BC5_UNORM, + Tf::Bc5RgSnorm => DXGI_FORMAT_BC5_SNORM, + Tf::Bc6hRgbUfloat => DXGI_FORMAT_BC6H_UF16, + Tf::Bc6hRgbSfloat => DXGI_FORMAT_BC6H_SF16, + Tf::Bc7RgbaUnorm => DXGI_FORMAT_BC7_UNORM, + Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_UNORM_SRGB, + Tf::Etc2RgbUnorm + | Tf::Etc2RgbUnormSrgb + | Tf::Etc2RgbA1Unorm + | Tf::Etc2RgbA1UnormSrgb + | Tf::EacRUnorm + | Tf::EacRSnorm + | Tf::EacRgUnorm + | Tf::EacRgSnorm + | Tf::Astc4x4RgbaUnorm + | Tf::Astc4x4RgbaUnormSrgb + | Tf::Astc5x4RgbaUnorm + | Tf::Astc5x4RgbaUnormSrgb + | Tf::Astc5x5RgbaUnorm + | Tf::Astc5x5RgbaUnormSrgb + | Tf::Astc6x5RgbaUnorm + | Tf::Astc6x5RgbaUnormSrgb + | Tf::Astc6x6RgbaUnorm + | Tf::Astc6x6RgbaUnormSrgb + | Tf::Astc8x5RgbaUnorm + | Tf::Astc8x5RgbaUnormSrgb + | Tf::Astc8x6RgbaUnorm + | Tf::Astc8x6RgbaUnormSrgb + | Tf::Astc10x5RgbaUnorm + | Tf::Astc10x5RgbaUnormSrgb + | Tf::Astc10x6RgbaUnorm + | Tf::Astc10x6RgbaUnormSrgb + | Tf::Astc8x8RgbaUnorm + | Tf::Astc8x8RgbaUnormSrgb + | Tf::Astc10x8RgbaUnorm + | Tf::Astc10x8RgbaUnormSrgb + | Tf::Astc10x10RgbaUnorm + | Tf::Astc10x10RgbaUnormSrgb + | Tf::Astc12x10RgbaUnorm + | Tf::Astc12x10RgbaUnormSrgb + | Tf::Astc12x12RgbaUnorm + | Tf::Astc12x12RgbaUnormSrgb => unreachable!(), + } +} + +//Note: DXGI doesn't allow sRGB format on the swapchain, +// but creating RTV of swapchain buffers with sRGB works. +pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, + wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + _ => map_texture_format(format), + } +} + +//Note: SRV and UAV can't use the depth formats directly +//TODO: stencil views? +pub fn map_texture_format_nodepth(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Depth32Float => dxgiformat::DXGI_FORMAT_R32_FLOAT, + wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8 => { + dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS + } + _ => { + assert_eq!( + crate::FormatAspects::from(format), + crate::FormatAspects::COLOR + ); + map_texture_format(format) + } + } +} + +pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, + wgt::IndexFormat::Uint32 => dxgiformat::DXGI_FORMAT_R32_UINT, + } +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::VertexFormat as Vf; + use winapi::shared::dxgiformat::*; + + match format { + Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM, + Vf::Snorm8x2 => DXGI_FORMAT_R8G8_SNORM, + Vf::Uint8x2 => DXGI_FORMAT_R8G8_UINT, + Vf::Sint8x2 => DXGI_FORMAT_R8G8_SINT, + Vf::Unorm8x4 => DXGI_FORMAT_R8G8B8A8_UNORM, + Vf::Snorm8x4 => DXGI_FORMAT_R8G8B8A8_SNORM, + Vf::Uint8x4 => DXGI_FORMAT_R8G8B8A8_UINT, + Vf::Sint8x4 => DXGI_FORMAT_R8G8B8A8_SINT, + Vf::Unorm16x2 => DXGI_FORMAT_R16G16_UNORM, + Vf::Snorm16x2 => DXGI_FORMAT_R16G16_SNORM, + Vf::Uint16x2 => DXGI_FORMAT_R16G16_UINT, + Vf::Sint16x2 => DXGI_FORMAT_R16G16_SINT, + Vf::Float16x2 => DXGI_FORMAT_R16G16_FLOAT, + Vf::Unorm16x4 => DXGI_FORMAT_R16G16B16A16_UNORM, + Vf::Snorm16x4 => DXGI_FORMAT_R16G16B16A16_SNORM, + Vf::Uint16x4 => DXGI_FORMAT_R16G16B16A16_UINT, + Vf::Sint16x4 => DXGI_FORMAT_R16G16B16A16_SINT, + Vf::Float16x4 => DXGI_FORMAT_R16G16B16A16_FLOAT, + Vf::Uint32 => DXGI_FORMAT_R32_UINT, + Vf::Sint32 => DXGI_FORMAT_R32_SINT, + Vf::Float32 => DXGI_FORMAT_R32_FLOAT, + Vf::Uint32x2 => DXGI_FORMAT_R32G32_UINT, + Vf::Sint32x2 => DXGI_FORMAT_R32G32_SINT, + Vf::Float32x2 => DXGI_FORMAT_R32G32_FLOAT, + Vf::Uint32x3 => DXGI_FORMAT_R32G32B32_UINT, + Vf::Sint32x3 => DXGI_FORMAT_R32G32B32_SINT, + Vf::Float32x3 => DXGI_FORMAT_R32G32B32_FLOAT, + Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, + Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, + Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_acomposite_alpha_mode(mode: crate::CompositeAlphaMode) -> dxgi1_2::DXGI_ALPHA_MODE { + use crate::CompositeAlphaMode as Cam; + match mode { + Cam::Opaque => dxgi1_2::DXGI_ALPHA_MODE_IGNORE, + Cam::PreMultiplied => dxgi1_2::DXGI_ALPHA_MODE_PREMULTIPLIED, + Cam::PostMultiplied => dxgi1_2::DXGI_ALPHA_MODE_STRAIGHT, + } +} + +pub fn map_buffer_usage_to_resource_flags(usage: crate::BufferUses) -> d3d12::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + if usage.contains(crate::BufferUses::STORAGE_STORE) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + flags +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12::D3D12_RESOURCE_DIMENSION { + match dim { + wgt::TextureDimension::D1 => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + wgt::TextureDimension::D2 => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + wgt::TextureDimension::D3 => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + } +} + +pub fn map_texture_usage_to_resource_flags( + usage: crate::TextureUses, +) -> d3d12::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) { + flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + } + if usage.contains(crate::TextureUses::STORAGE_STORE) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + flags +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12::D3D12_TEXTURE_ADDRESS_MODE { + use wgt::AddressMode as Am; + match mode { + Am::Repeat => d3d12::D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Am::MirrorRepeat => d3d12::D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Am::ClampToEdge => d3d12::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Am::ClampToBorder => d3d12::D3D12_TEXTURE_ADDRESS_MODE_BORDER, + //Am::MirrorClamp => d3d12::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12::D3D12_FILTER_TYPE { + match mode { + wgt::FilterMode::Nearest => d3d12::D3D12_FILTER_TYPE_POINT, + wgt::FilterMode::Linear => d3d12::D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_comparison(func: wgt::CompareFunction) -> d3d12::D3D12_COMPARISON_FUNC { + use wgt::CompareFunction as Cf; + match func { + Cf::Never => d3d12::D3D12_COMPARISON_FUNC_NEVER, + Cf::Less => d3d12::D3D12_COMPARISON_FUNC_LESS, + Cf::LessEqual => d3d12::D3D12_COMPARISON_FUNC_LESS_EQUAL, + Cf::Equal => d3d12::D3D12_COMPARISON_FUNC_EQUAL, + Cf::GreaterEqual => d3d12::D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Cf::Greater => d3d12::D3D12_COMPARISON_FUNC_GREATER, + Cf::NotEqual => d3d12::D3D12_COMPARISON_FUNC_NOT_EQUAL, + Cf::Always => d3d12::D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +pub fn map_border_color(border_color: Option) -> [f32; 4] { + use wgt::SamplerBorderColor as Sbc; + match border_color { + Some(Sbc::TransparentBlack) | None => [0.0; 4], + Some(Sbc::OpaqueBlack) => [0.0, 0.0, 0.0, 1.0], + Some(Sbc::OpaqueWhite) => [1.0; 4], + } +} + +pub fn map_visibility(visibility: wgt::ShaderStages) -> native::ShaderVisibility { + match visibility { + wgt::ShaderStages::VERTEX => native::ShaderVisibility::VS, + wgt::ShaderStages::FRAGMENT => native::ShaderVisibility::PS, + _ => native::ShaderVisibility::All, + } +} + +pub fn map_binding_type(ty: &wgt::BindingType) -> native::DescriptorRangeType { + use wgt::BindingType as Bt; + match *ty { + Bt::Sampler { .. } => native::DescriptorRangeType::Sampler, + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => native::DescriptorRangeType::CBV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: true }, + .. + } + | Bt::Texture { .. } + | Bt::StorageTexture { + access: wgt::StorageTextureAccess::ReadOnly, + .. + } => native::DescriptorRangeType::SRV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: false }, + .. + } + | Bt::StorageTexture { .. } => native::DescriptorRangeType::UAV, + } +} + +pub fn map_label(name: &str) -> Vec { + name.encode_utf16().chain(iter::once(0)).collect() +} + +pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12::D3D12_RESOURCE_STATES { + use crate::BufferUses as Bu; + let mut state = d3d12::D3D12_RESOURCE_STATE_COMMON; + + if usage.intersects(Bu::COPY_SRC) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Bu::COPY_DST) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Bu::INDEX) { + state |= d3d12::D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if usage.intersects(Bu::VERTEX | Bu::UNIFORM) { + state |= d3d12::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if usage.intersects(Bu::STORAGE_LOAD) { + state |= d3d12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Bu::STORAGE_STORE) { + state |= d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + if usage.intersects(Bu::INDIRECT) { + state |= d3d12::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + state +} + +pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12::D3D12_RESOURCE_STATES { + use crate::TextureUses as Tu; + let mut state = d3d12::D3D12_RESOURCE_STATE_COMMON; + //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here + //Note: `PRESENT` is the same as `COMMON` + if usage == crate::TextureUses::UNINITIALIZED { + return state; + } + + if usage.intersects(Tu::COPY_SRC) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Tu::COPY_DST) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Tu::SAMPLED | Tu::STORAGE_LOAD) { + state |= d3d12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Tu::COLOR_TARGET) { + state |= d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if usage.intersects(Tu::DEPTH_STENCIL_READ) { + state |= d3d12::D3D12_RESOURCE_STATE_DEPTH_READ; + } + if usage.intersects(Tu::DEPTH_STENCIL_WRITE) { + state |= d3d12::D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if usage.intersects(Tu::STORAGE_STORE) { + state |= d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + state +} + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Point => { + log::error!("Point rasterization is not supported"); + d3d12::D3D12_FILL_MODE_WIREFRAME + } + wgt::PolygonMode::Line => d3d12::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Fill => d3d12::D3D12_FILL_MODE_SOLID, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12::D3D12_BLEND_ZERO, + Bf::One => d3d12::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12::D3D12_BLEND_SRC_ALPHA_SAT, + //Bf::Src1Color if is_alpha => d3d12::D3D12_BLEND_SRC1_ALPHA, + //Bf::Src1Color => d3d12::D3D12_BLEND_SRC1_COLOR, + //Bf::OneMinusSrc1Color if is_alpha => d3d12::D3D12_BLEND_INV_SRC1_ALPHA, + //Bf::OneMinusSrc1Color => d3d12::D3D12_BLEND_INV_SRC1_COLOR, + //Bf::Src1Alpha => d3d12::D3D12_BLEND_SRC1_ALPHA, + //Bf::OneMinusSrc1Alpha => d3d12::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12::D3D12_BLEND_OP, + d3d12::D3D12_BLEND, + d3d12::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[wgt::ColorTargetState], +) -> [d3d12::D3D12_RENDER_TARGET_BLEND_DESC; d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] +{ + let dummy_target = d3d12::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12::D3D12_BLEND_ZERO, + DestBlend: d3d12::D3D12_BLEND_ZERO, + BlendOp: d3d12::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12::D3D12_BLEND_OP_ADD, + LogicOp: d3d12::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12::D3D12_DEPTH_STENCILOP_DESC { + d3d12::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12::D3D12_DEPTH_STENCIL_DESC { + d3d12::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: if ds.is_depth_enabled() { 1 } else { 0 }, + DepthWriteMask: if ds.depth_write_enabled { + d3d12::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: if ds.stencil.is_enabled() { 1 } else { 0 }, + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs new file mode 100644 index 0000000000..da8ce6786b --- /dev/null +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -0,0 +1,306 @@ +use super::HResult as _; +use bit_set::BitSet; +use parking_lot::Mutex; +use range_alloc::RangeAllocator; +use std::fmt; + +const HEAP_SIZE_FIXED: usize = 64; + +#[derive(Copy, Clone)] +pub(super) struct DualHandle { + cpu: native::CpuDescriptor, + pub gpu: native::GpuDescriptor, + /// How large the block allocated to this handle is. + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } +} + +type DescriptorIndex = u64; + +pub(super) struct GeneralHeap { + pub raw: native::DescriptorHeap, + ty: native::DescriptorHeapType, + handle_size: u64, + total_handles: u64, + start: DualHandle, + ranges: Mutex>, +} + +impl GeneralHeap { + pub(super) fn new( + device: native::Device, + ty: native::DescriptorHeapType, + total_handles: u64, + ) -> Result { + let raw = device + .create_descriptor_heap( + total_handles as u32, + ty, + native::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ) + .into_device_result("Descriptor heap creation")?; + + Ok(Self { + raw, + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, + total_handles, + start: DualHandle { + cpu: raw.start_cpu_descriptor(), + gpu: raw.start_gpu_descriptor(), + count: 0, + }, + ranges: Mutex::new(RangeAllocator::new(0..total_handles)), + }) + } + + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + count, + } + } + + fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { + native::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } + + pub(super) fn allocate_slice(&self, count: u64) -> Result { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } +} + +/// Fixed-size free-list allocator for CPU descriptors. +struct FixedSizeHeap { + raw: native::DescriptorHeap, + /// Bit flag representation of available handles in the heap. + /// + /// 0 - Occupied + /// 1 - free + availability: u64, + handle_size: usize, + start: native::CpuDescriptor, +} + +impl FixedSizeHeap { + fn new(device: native::Device, ty: native::DescriptorHeapType) -> Self { + let (heap, _hr) = device.create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + native::DescriptorHeapFlags::empty(), + 0, + ); + + Self { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + fn alloc_handle(&mut self) -> native::CpuDescriptor { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + assert!(slot < HEAP_SIZE_FIXED); + // Set the slot as occupied. + self.availability ^= 1 << slot; + + native::CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + fn free_handle(&mut self, handle: native::CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + fn is_full(&self) -> bool { + self.availability == 0 + } + + unsafe fn destroy(&self) { + self.raw.destroy(); + } +} + +#[derive(Clone, Copy)] +pub(super) struct Handle { + pub raw: native::CpuDescriptor, + heap_index: usize, +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Handle") + .field("ptr", &self.raw.ptr) + .field("heap_index", &self.heap_index) + .finish() + } +} + +pub(super) struct CpuPool { + device: native::Device, + ty: native::DescriptorHeapType, + heaps: Vec, + avaliable_heap_indices: BitSet, +} + +impl CpuPool { + pub(super) fn new(device: native::Device, ty: native::DescriptorHeapType) -> Self { + Self { + device, + ty, + heaps: Vec::new(), + avaliable_heap_indices: BitSet::new(), + } + } + + pub(super) fn alloc_handle(&mut self) -> Handle { + let heap_index = self + .avaliable_heap_indices + .iter() + .next() + .unwrap_or_else(|| { + // Allocate a new heap + let id = self.heaps.len(); + self.heaps.push(FixedSizeHeap::new(self.device, self.ty)); + self.avaliable_heap_indices.insert(id); + id + }); + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle(), + heap_index, + }; + if heap.is_full() { + self.avaliable_heap_indices.remove(heap_index); + } + + handle + } + + pub(super) fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.avaliable_heap_indices.insert(handle.heap_index); + } + + pub(super) unsafe fn destroy(&self) { + for heap in &self.heaps { + heap.destroy(); + } + } +} + +pub(super) struct CpuHeapInner { + pub raw: native::DescriptorHeap, + pub stage: Vec, +} + +pub(super) struct CpuHeap { + pub inner: Mutex, + start: native::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: native::Device, + ty: native::DescriptorHeapType, + total: u32, + ) -> Result { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, native::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + raw, + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) unsafe fn destroy(self) { + self.inner.into_inner().raw.destroy(); + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: native::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ); + Ok(dst.at(index, count as u64)) +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs new file mode 100644 index 0000000000..0f5f5a3570 --- /dev/null +++ b/wgpu-hal/src/dx12/device.rs @@ -0,0 +1,1674 @@ +use super::{conv, descriptor, HResult as _}; +use parking_lot::Mutex; +use std::{ffi, mem, num::NonZeroU32, ptr, slice, sync::Arc, thread}; +use winapi::{ + shared::{dxgiformat, dxgitype, winerror}, + um::{d3d12, d3d12sdklayers, d3dcompiler, synchapi, winbase}, + Interface, +}; + +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; +const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; +//TODO: find the exact value +const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12::D3D12_HEAP_FLAG_NONE; + +impl super::Device { + pub(super) fn new( + raw: native::Device, + present_queue: native::CommandQueue, + features: wgt::Features, + private_caps: super::PrivateCapabilities, + library: &Arc, + ) -> Result { + let mut idle_fence = native::Fence::null(); + let hr = unsafe { + raw.CreateFence( + 0, + d3d12::D3D12_FENCE_FLAG_NONE, + &d3d12::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ) + }; + hr.into_device_result("Idle fence creation")?; + + let mut zero_buffer = native::Resource::null(); + unsafe { + let raw_desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: super::ZERO_BUFFER_SIZE, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12::D3D12_RESOURCE_FLAG_NONE, + }; + + let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12::D3D12_MEMORY_POOL_L1, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + raw.CreateCommittedResource( + &heap_properties, + d3d12::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + zero_buffer.mut_void(), + ) + .into_device_result("Zero buffer creation")?; + + //Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. + }; + + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let capacity_views = 1_000_000; + let capacity_samplers = 2_048; + + let shared = super::DeviceShared { + features, + zero_buffer, + cmd_signatures: super::CommandSignatures { + draw: raw + .create_command_signature( + native::RootSignature::null(), + &[native::IndirectArgument::draw()], + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: raw + .create_command_signature( + native::RootSignature::null(), + &[native::IndirectArgument::draw_indexed()], + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: raw + .create_command_signature( + native::RootSignature::null(), + &[native::IndirectArgument::dispatch()], + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }, + heap_views: descriptor::GeneralHeap::new( + raw, + native::DescriptorHeapType::CbvSrvUav, + capacity_views, + )?, + heap_samplers: descriptor::GeneralHeap::new( + raw, + native::DescriptorHeapType::Sampler, + capacity_samplers, + )?, + }; + + Ok(super::Device { + raw, + present_queue, + idler: super::Idler { + fence: idle_fence, + event: native::Event::create(false, false), + }, + private_caps, + shared: Arc::new(shared), + rtv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::Rtv, + )), + dsv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::Dsv, + )), + srv_uav_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::CbvSrvUav, + )), + sampler_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::Sampler, + )), + library: Arc::clone(library), + }) + } + + pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(crate::DeviceError::Lost); + } + + let value = cur_value + 1; + log::info!("Waiting for idle with value {}", value); + self.present_queue.signal(self.idler.fence, value); + let hr = self + .idler + .fence + .set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE); + Ok(()) + } + + unsafe fn view_texture_as_shader_resource( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: conv::map_texture_format_nodepth(desc.format), + ViewDimension: 0, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + u: mem::zeroed(), + }; + + #[allow(non_snake_case)] + let MipLevels = match desc.range.mip_level_count { + Some(count) => count.get(), + None => !0, + }; + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + ResourceMinLODClamp: 0.0, + } + }*/ + wgt::TextureViewDimension::D2 if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMS; + *raw_desc.u.Texture2DMS_mut() = d3d12::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::D2Array if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::D3 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE3D; + *raw_desc.u.Texture3D_mut() = d3d12::D3D12_TEX3D_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::Cube => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURECUBE; + *raw_desc.u.TextureCube_mut() = d3d12::D3D12_TEXCUBE_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::CubeArray => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + *raw_desc.u.TextureCubeArray_mut() = d3d12::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + First2DArrayFace: desc.range.base_array_layer, + NumCubes: array_size / 6, + ResourceMinLODClamp: 0.0, + } + } + } + + let handle = self.srv_uav_pool.lock().alloc_handle(); + self.raw + .CreateShaderResourceView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); + handle + } + + unsafe fn view_texture_as_unoredered_access( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: conv::map_texture_format_nodepth(desc.format), + ViewDimension: 0, + u: mem::zeroed(), + }; + + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_UAV { + MipSlice: desc.range.base_mip_level, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_UAV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_UAV { + MipSlice: desc.range.base_mip_level, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_UAV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D3 => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE3D; + *raw_desc.u.Texture3D_mut() = d3d12::D3D12_TEX3D_UAV { + MipSlice: desc.range.base_mip_level, + FirstWSlice: desc.range.base_array_layer, + WSize: array_size, + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube UAV") + } + } + + let handle = self.srv_uav_pool.lock().alloc_handle(); + self.raw.CreateUnorderedAccessView( + texture.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle.raw, + ); + handle + } + + unsafe fn view_texture_as_render_target( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC { + Format: conv::map_texture_format(desc.format), + ViewDimension: 0, + u: mem::zeroed(), + }; + + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_RTV { + MipSlice: desc.range.base_mip_level, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_RTV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DMS; + *raw_desc.u.Texture2DMS_mut() = d3d12::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_RTV { + MipSlice: desc.range.base_mip_level, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D2Array if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_RTV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D3 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE3D; + *raw_desc.u.Texture3D_mut() = d3d12::D3D12_TEX3D_RTV { + MipSlice: desc.range.base_mip_level, + FirstWSlice: desc.range.base_array_layer, + WSize: array_size, + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube RTV") + } + } + + let handle = self.rtv_pool.lock().alloc_handle(); + self.raw + .CreateRenderTargetView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); + handle + } + + unsafe fn view_texture_as_depth_stencil( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + read_only: bool, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: conv::map_texture_format(desc.format), + ViewDimension: 0, + Flags: if read_only { + let aspects = crate::FormatAspects::from(desc.format); + let mut flags = 0; + if aspects.contains(crate::FormatAspects::DEPTH) { + flags |= d3d12::D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + flags |= d3d12::D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + flags + } else { + d3d12::D3D12_DSV_FLAG_NONE + }, + u: mem::zeroed(), + }; + + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_DSV { + MipSlice: desc.range.base_mip_level, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_DSV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DMS; + *raw_desc.u.Texture2DMS_mut() = d3d12::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_DSV { + MipSlice: desc.range.base_mip_level, + } + } + wgt::TextureViewDimension::D2Array if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_DSV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D3 + | wgt::TextureViewDimension::Cube + | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube or 3D RTV") + } + } + + let handle = self.dsv_pool.lock().alloc_handle(); + self.raw + .CreateDepthStencilView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); + handle + } + + fn load_shader( + &self, + stage: &crate::ProgrammableStage, + layout: &super::PipelineLayout, + naga_stage: naga::ShaderStage, + ) -> Result { + use naga::back::hlsl; + + let stage_bit = crate::util::map_naga_stage(naga_stage); + let module = &stage.module.naga.module; + //TODO: reuse the writer + let mut source = String::new(); + let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let _reflection_info = writer + .write(module, &stage.module.naga.info) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {:?}", e)))?; + + let full_stage = format!( + "{}_{}\0", + naga_stage.to_hlsl_str(), + layout.naga_options.shader_model.to_str() + ); + let raw_ep = ffi::CString::new(stage.entry_point).unwrap(); + + let mut shader_data = native::Blob::null(); + let mut error = native::Blob::null(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if self.private_caps.shader_debug_info { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG; + } + if self + .shared + .features + .contains(wgt::Features::UNSIZED_BINDING_ARRAY) + { + compile_flags |= d3dcompiler::D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES; + } + + let hr = unsafe { + d3dcompiler::D3DCompile( + source.as_ptr() as *const _, + source.len(), + ptr::null(), + ptr::null(), + ptr::null_mut(), + raw_ep.as_ptr(), + full_stage.as_ptr() as *const i8, + compile_flags, + 0, + shader_data.mut_void() as *mut *mut _, + error.mut_void() as *mut *mut _, + ) + }; + + match hr.into_result() { + Ok(()) => Ok(shader_data), + Err(e) => { + log::warn!("Naga generated shader:\n{}", source); + let message = unsafe { + let slice = slice::from_raw_parts( + error.GetBufferPointer() as *const u8, + error.GetBufferSize(), + ); + String::from_utf8_lossy(slice) + }; + let full_msg = format!("D3DCompile error ({}): {}", e, message); + unsafe { + error.destroy(); + } + Err(crate::PipelineError::Linkage(stage_bit, full_msg)) + } + } + } +} + +impl crate::Device for super::Device { + unsafe fn exit(self) { + self.rtv_pool.into_inner().destroy(); + self.dsv_pool.into_inner().destroy(); + self.srv_uav_pool.into_inner().destroy(); + self.sampler_pool.into_inner().destroy(); + self.shared.destroy(); + self.idler.destroy(); + + // Debug tracking alive objects + if !thread::panicking() { + if let Ok(debug_device) = self + .raw + .cast::() + .into_result() + { + debug_device.ReportLiveDeviceObjects( + d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL, + ); + debug_device.destroy(); + } + } + + self.raw.destroy(); + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result { + let mut resource = native::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } + + let raw_desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_usage_to_resource_flags(desc.usage), + }; + + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: if is_cpu_read { + d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else if is_cpu_write { + d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + } else { + d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE + }, + MemoryPoolPreference: match self.private_caps.memory_architecture { + super::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => { + d3d12::D3D12_MEMORY_POOL_L1 + } + _ => d3d12::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = self.raw.CreateCommittedResource( + &heap_properties, + if self.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ); + + hr.into_device_result("Buffer creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + resource.SetName(cwstr.as_ptr()); + } + + Ok(super::Buffer { resource, size }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + buffer.resource.destroy(); + } + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result { + let mut ptr = ptr::null_mut(); + let hr = (*buffer.resource).Map(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }, &mut ptr); + hr.into_device_result("Map buffer")?; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(ptr.offset(range.start as isize) as *mut _).unwrap(), + //TODO: double-check this. Documentation is a bit misleading - + // it implies that Map/Unmap is needed to invalidate/flush memory. + is_coherent: true, + }) + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + (*buffer.resource).Unmap(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }); + Ok(()) + } + unsafe fn flush_mapped_ranges(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result { + let mut resource = native::Resource::null(); + + let raw_desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: conv::map_texture_dimension(desc.dimension), + Alignment: 0, + Width: desc.size.width as u64, + Height: desc.size.height, + DepthOrArraySize: desc.size.depth_or_array_layers as u16, + MipLevels: desc.mip_level_count as u16, + //TODO: map to surface format to allow view casting + Format: conv::map_texture_format(desc.format), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.sample_count, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE, + Flags: conv::map_texture_usage_to_resource_flags(desc.usage), + }; + + let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match self.private_caps.memory_architecture { + super::MemoryArchitecture::NonUnified => d3d12::D3D12_MEMORY_POOL_L1, + super::MemoryArchitecture::Unified { .. } => d3d12::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = self.raw.CreateCommittedResource( + &heap_properties, + if self.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ); + + hr.into_device_result("Texture creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + resource.SetName(cwstr.as_ptr()); + } + + Ok(super::Texture { + resource, + format: desc.format, + dimension: desc.dimension, + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + texture.resource.destroy(); + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result { + Ok(super::TextureView { + //Note: this mapping also happens in all of the `view_texture_as_*` + raw_format: conv::map_texture_format(desc.format), + target_base: ( + texture.resource, + texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), + ), + handle_srv: if desc + .usage + .intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) + { + Some(self.view_texture_as_shader_resource(texture, desc)) + } else { + None + }, + handle_uav: if desc.usage.intersects(crate::TextureUses::STORAGE_STORE) { + Some(self.view_texture_as_unoredered_access(texture, desc)) + } else { + None + }, + handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + Some(self.view_texture_as_render_target(texture, desc)) + } else { + None + }, + handle_dsv_ro: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_READ) + { + Some(self.view_texture_as_depth_stencil(texture, desc, true)) + } else { + None + }, + handle_dsv_rw: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) + { + Some(self.view_texture_as_depth_stencil(texture, desc, false)) + } else { + None + }, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if view.handle_srv.is_some() || view.handle_uav.is_some() { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } + } + if let Some(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if view.handle_dsv_ro.is_some() || view.handle_dsv_rw.is_some() { + let mut pool = self.dsv_pool.lock(); + if let Some(handle) = view.handle_dsv_ro { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_dsv_rw { + pool.free_handle(handle); + } + } + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result { + let handle = self.sampler_pool.lock().alloc_handle(); + + let reduction = match desc.compare { + Some(_) => d3d12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + let filter = conv::map_filter_mode(desc.min_filter) << d3d12::D3D12_MIN_FILTER_SHIFT + | conv::map_filter_mode(desc.mag_filter) << d3d12::D3D12_MAG_FILTER_SHIFT + | conv::map_filter_mode(desc.mipmap_filter) << d3d12::D3D12_MIP_FILTER_SHIFT + | reduction << d3d12::D3D12_FILTER_REDUCTION_TYPE_SHIFT + | desc + .anisotropy_clamp + .map_or(0, |_| d3d12::D3D12_FILTER_ANISOTROPIC); + + self.raw.create_sampler( + handle.raw, + filter, + [ + conv::map_address_mode(desc.address_modes[0]), + conv::map_address_mode(desc.address_modes[1]), + conv::map_address_mode(desc.address_modes[2]), + ], + 0.0, + desc.anisotropy_clamp.map_or(0, |aniso| aniso.get() as u32), + conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)), + conv::map_border_color(desc.border_color), + desc.lod_clamp.clone().unwrap_or(0.0..16.0), + ); + + Ok(super::Sampler { handle }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + self.sampler_pool.lock().free_handle(sampler.handle); + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor, + ) -> Result { + let allocator = self + .raw + .create_command_allocator(native::CmdListType::Direct) + .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + allocator.SetName(cwstr.as_ptr()); + } + + Ok(super::CommandEncoder { + allocator, + device: self.raw, + shared: Arc::clone(&self.shared), + list: None, + free_lists: Vec::new(), + pass: super::PassState::new(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { + if let Some(list) = encoder.list { + list.close(); + list.destroy(); + } + for list in encoder.free_lists { + list.destroy(); + } + encoder.allocator.destroy(); + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += 1, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += 1 + } + wgt::BindingType::Sampler { .. } => num_samplers += 1, + } + } + + let num_views = num_buffer_views + num_texture_views; + Ok(super::BindGroupLayout { + entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + native::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + native::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + if let Some(cpu_heap) = bg_layout.cpu_heap_views { + cpu_heap.destroy(); + } + if let Some(cpu_heap) = bg_layout.cpu_heap_samplers { + cpu_heap.destroy(); + } + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor, + ) -> Result { + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Dynamic uniform buffers are implemented as root descriptors. + // This allows to handle the dynamic offsets properly, which would not be feasible + // with a combination of root constant and descriptor table. + // + // Root signature layout: + // Root Constants: Register: Offest/4, Space: 0 + // ... + // DescriptorTable0: Space: 1 (SrvCbvUav) + // DescriptorTable0: Space: 1 (Sampler) + // Root Descriptors 0 + // DescriptorTable1: Space: 2 (SrvCbvUav) + // Root Descriptors 1 + // ... + + //TODO: reverse the order, according to this advice in + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + //> Furthermore, applications should generally sort the layout + //> of the root arguments in decreasing order of change frequency. + //> This way if some implementations need to switch to a different + //> memory storage scheme to version parts of a heavily populated + //> root arguments, the data that is changing at the highest frequency + //> (near the start of the root arguments) is most likely to run + //> as efficiently as possible. + + let root_constants: &[()] = &[]; + + // Number of elements in the root signature. + let total_parameters = root_constants.len() + desc.bind_group_layouts.len() * 2; + // Guarantees that no re-allocation is done, and our pointers are valid + let mut parameters = Vec::with_capacity(total_parameters); + + let root_space_offset = if !root_constants.is_empty() { 1 } else { 0 }; + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total_non_dynamic_entries = desc + .bind_group_layouts + .iter() + .flat_map(|bgl| { + bgl.entries.iter().map(|entry| match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => 0, + _ => 1, + }) + }) + .sum(); + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); + + let mut bind_group_infos = + arrayvec::ArrayVec::::default(); + for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { + let space = root_space_offset + index as u32; + let mut info = super::BindGroupInfo { + tables: super::TableTypes::empty(), + base_root_index: parameters.len() as u32, + dynamic_buffers: Vec::new(), + }; + + let mut visibility_view_static = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_sampler = wgt::ShaderStages::empty(); + for entry in bgl.entries.iter() { + match entry.ty { + wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => visibility_view_dynamic |= entry.visibility, + _ => visibility_view_static |= entry.visibility, + } + } + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } + | wgt::BindingType::Sampler { .. } => continue, + ref other => conv::map_binding_type(other), + }; + ranges.push(native::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native::Binding { + register: entry.binding, + space, + }, + d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + } + if ranges.len() > range_base { + parameters.push(native::RootParameter::descriptor_table( + conv::map_visibility(visibility_view_static), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SRV_CBV_UAV; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Sampler { .. } => native::DescriptorRangeType::Sampler, + _ => continue, + }; + ranges.push(native::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native::Binding { + register: entry.binding, + space, + }, + d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + } + if ranges.len() > range_base { + parameters.push(native::RootParameter::descriptor_table( + conv::map_visibility(visibility_sampler), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SAMPLERS; + } + + // Root (dynamic) descriptor tables + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + for entry in bgl.entries.iter() { + let buffer_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + ty, + .. + } => ty, + _ => continue, + }; + let binding = native::Binding { + register: entry.binding, + space, + }; + let (kind, param) = match buffer_ty { + wgt::BufferBindingType::Uniform => ( + super::BufferViewKind::Constant, + native::RootParameter::cbv_descriptor(dynamic_buffers_visibility, binding), + ), + wgt::BufferBindingType::Storage { read_only: true } => ( + super::BufferViewKind::ShaderResource, + native::RootParameter::srv_descriptor(dynamic_buffers_visibility, binding), + ), + wgt::BufferBindingType::Storage { read_only: false } => ( + super::BufferViewKind::UnorderedAccess, + native::RootParameter::uav_descriptor(dynamic_buffers_visibility, binding), + ), + }; + info.dynamic_buffers.push(kind); + parameters.push(param); + } + + bind_group_infos.push(info); + } + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total_non_dynamic_entries); + + let (blob, error) = self + .library + .serialize_root_signature( + native::RootSignatureVersion::V1_0, + ¶meters, + &[], + native::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) + .map_err(|e| { + log::error!("Unable to find serialization function: {:?}", e); + crate::DeviceError::Lost + })? + .into_device_result("Root signature serialization")?; + + if !error.is_null() { + log::error!( + "Root signature serialization error: {:?}", + error.as_c_str().to_str().unwrap() + ); + error.destroy(); + return Err(crate::DeviceError::Lost); + } + + let raw = self + .raw + .create_root_signature(blob, 0) + .into_device_result("Root signature creation")?; + blob.destroy(); + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::PipelineLayout { + raw, + bind_group_infos, + naga_options: naga::back::hlsl::Options { + shader_model: naga::back::hlsl::ShaderModel::V5_1, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + pipeline_layout.raw.destroy(); + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor, + ) -> Result { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => { + let data = &desc.buffers[entry.resource_index as usize]; + dynamic_buffers.push(data.resolve_address()); + } + wgt::BindingType::Buffer { ty, .. } => { + let data = &desc.buffers[entry.resource_index as usize]; + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + wgt::BufferBindingType::Uniform => { + let size_mask = + d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: ((size - 1) | size_mask) + 1, + }; + self.raw.CreateConstantBufferView(&raw_desc, handle); + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV { + FirstElement: data.offset, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW, + }; + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ); + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: data.offset, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW, + }; + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ); + } + } + inner.stage.push(handle); + } + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::ReadOnly, + .. + } => { + let data = &desc.textures[entry.resource_index as usize]; + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + wgt::BindingType::StorageTexture { .. } => { + let data = &desc.textures[entry.resource_index as usize]; + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + wgt::BindingType::Sampler { .. } => { + let data = &desc.samplers[entry.resource_index as usize]; + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = descriptor::upload( + self.raw, + &*inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + )?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = descriptor::upload( + self.raw, + &*inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + )?; + Some(dual) + } + None => None, + }; + + Ok(super::BindGroup { + handle_views, + handle_samplers, + dynamic_buffers, + }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + let _ = self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + let _ = self.shared.heap_samplers.free_slice(dual); + } + } + + unsafe fn create_shader_module( + &self, + _desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result { + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) { + // just drop + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor, + ) -> Result { + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)? + } + None => native::Blob::null(), + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::InputStepMode::Vertex => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::InputStepMode::Instance => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + input_element_descs.push(d3d12::D3D12_INPUT_ELEMENT_DESC { + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, + SemanticIndex: attribute.shader_location, + Format: conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + *rtv_format = conv::map_texture_format(ct.format); + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(); + + let raw_rasterizer = d3d12::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: if desc.primitive.clamp_depth { 0 } else { 1 }, + MultisampleEnable: if desc.multisample.count > 1 { 1 } else { 0 }, + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.raw.as_mut_ptr(), + VS: *native::Shader::from_blob(blob_vs), + PS: if blob_fs.is_null() { + *native::Shader::null() + } else { + *native::Shader::from_blob(blob_fs) + }, + GS: *native::Shader::null(), + DS: *native::Shader::null(), + HS: *native::Shader::null(), + StreamOutput: d3d12::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12::D3D12_BLEND_DESC { + AlphaToCoverageEnable: if desc.multisample.alpha_to_coverage_enabled { + 1 + } else { + 0 + }, + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => mem::zeroed(), + }, + InputLayout: d3d12::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, + Some(wgt::IndexFormat::Uint32) => { + d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = native::PipelineState::null(); + let hr = self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ); + + blob_vs.destroy(); + if !blob_fs.is_null() { + blob_fs.destroy(); + } + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::RenderPipeline { + raw, + signature: desc.layout.raw, + topology, + vertex_strides, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + pipeline.raw.destroy(); + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor, + ) -> Result { + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + + let pair = self.raw.create_compute_pipeline_state( + desc.layout.raw, + native::Shader::from_blob(blob_cs), + 0, + native::CachedPSO::null(), + native::PipelineStateFlags::empty(), + ); + + blob_cs.destroy(); + + let raw = pair.into_result().map_err(|err| { + crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) + })?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::ComputePipeline { + raw, + signature: desc.layout.raw, + }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + pipeline.raw.destroy(); + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor, + ) -> Result { + let (heap_ty, raw_ty) = match desc.ty { + wgt::QueryType::Occlusion => ( + native::QueryHeapType::Occlusion, + d3d12::D3D12_QUERY_TYPE_BINARY_OCCLUSION, + ), + wgt::QueryType::PipelineStatistics(_) => ( + native::QueryHeapType::PipelineStatistics, + d3d12::D3D12_QUERY_TYPE_TIMESTAMP, + ), + wgt::QueryType::Timestamp => ( + native::QueryHeapType::Timestamp, + d3d12::D3D12_QUERY_TYPE_PIPELINE_STATISTICS, + ), + }; + + let raw = self + .raw + .create_query_heap(heap_ty, desc.count, 0) + .into_device_result("Query heap creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::QuerySet { raw, raw_ty }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + set.raw.destroy(); + } + + unsafe fn create_fence(&self) -> Result { + let mut raw = native::Fence::null(); + let hr = self.raw.CreateFence( + 0, + d3d12::D3D12_FENCE_FLAG_NONE, + &d3d12::ID3D12Fence::uuidof(), + raw.mut_void(), + ); + hr.into_device_result("Fence creation")?; + Ok(super::Fence { raw }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + fence.raw.destroy(); + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result { + Ok(fence.raw.GetCompletedValue()) + } + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result { + if fence.raw.GetCompletedValue() >= value { + return Ok(true); + } + let hr = fence.raw.set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + + match synchapi::WaitForSingleObject(self.idler.event.0, timeout_ms) { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::DeviceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::DeviceError::Lost) + } + } + } + + unsafe fn start_capture(&self) -> bool { + false + } + unsafe fn stop_capture(&self) {} +} diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs new file mode 100644 index 0000000000..1e40a45a86 --- /dev/null +++ b/wgpu-hal/src/dx12/instance.rs @@ -0,0 +1,226 @@ +use super::HResult as _; +use std::{borrow::Cow, slice, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_6, winerror}, + um::{errhandlingapi, winnt}, + vc::excpt, + Interface, +}; + +const MESSAGE_PREFIXES: &[(&str, log::Level)] = &[ + ("CORRUPTION", log::Level::Error), + ("ERROR", log::Level::Error), + ("WARNING", log::Level::Warn), + ("INFO", log::Level::Info), + ("MESSAGE", log::Level::Debug), +]; + +unsafe extern "system" fn output_debug_string_handler( + exception_info: *mut winnt::EXCEPTION_POINTERS, +) -> i32 { + // See https://stackoverflow.com/a/41480827 + let record = &*(*exception_info).ExceptionRecord; + if record.NumberParameters != 2 { + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + let message = match record.ExceptionCode { + winnt::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(slice::from_raw_parts( + record.ExceptionInformation[1] as *const u8, + record.ExceptionInformation[0], + )), + winnt::DBG_PRINTEXCEPTION_WIDE_C => { + Cow::Owned(String::from_utf16_lossy(slice::from_raw_parts( + record.ExceptionInformation[1] as *const u16, + record.ExceptionInformation[0], + ))) + } + _ => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let (message, level) = match message.strip_prefix("D3D12 ") { + Some(msg) => { + match MESSAGE_PREFIXES + .iter() + .find(|&&(prefix, _)| msg.starts_with(prefix)) + { + Some(&(prefix, level)) => (&msg[prefix.len() + 2..], level), + None => (msg, log::Level::Debug), + } + } + None => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + log::log!(level, "{}", message.trim_end_matches("\n\0")); + + if cfg!(debug_assertions) && level == log::Level::Error { + std::process::exit(1); + } + + excpt::EXCEPTION_CONTINUE_EXECUTION +} + +impl Drop for super::Instance { + fn drop(&mut self) { + unsafe { + self.factory.destroy(); + errhandlingapi::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _); + } + } +} + +impl crate::Instance for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { + let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; + + let lib_dxgi = native::DxgiLib::new().map_err(|_| crate::InstanceError)?; + let mut factory_flags = native::FactoryCreationFlags::empty(); + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + debug_controller.enable_layer(); + debug_controller.Release(); + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + match lib_dxgi.get_debug_interface1() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + debug_controller.destroy(); + factory_flags |= native::FactoryCreationFlags::DEBUG; + } + Err(err) => { + log::warn!("Unable to enable DXGI debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for DXGI not found: {:?}", err); + } + } + + // Intercept `OutputDebugString` calls + errhandlingapi::AddVectoredExceptionHandler(0, Some(output_debug_string_handler)); + } + + // Create DXGI factory + let factory = match lib_dxgi.create_factory2(factory_flags) { + Ok(pair) => match pair.into_result() { + Ok(factory) => factory, + Err(err) => { + log::warn!("Failed to create DXGI factory: {}", err); + return Err(crate::InstanceError); + } + }, + Err(err) => { + log::warn!("Factory creation function for DXGI not found: {:?}", err); + return Err(crate::InstanceError); + } + }; + + Ok(Self { + factory, + library: Arc::new(lib_main), + _lib_dxgi: lib_dxgi, + flags: desc.flags, + }) + } + + unsafe fn create_surface( + &self, + has_handle: &impl raw_window_handle::HasRawWindowHandle, + ) -> Result { + match has_handle.raw_window_handle() { + raw_window_handle::RawWindowHandle::Windows(handle) => Ok(super::Surface { + factory: self.factory, + wnd_handle: handle.hwnd as *mut _, + swap_chain: None, + }), + _ => Err(crate::InstanceError), + } + } + unsafe fn destroy_surface(&self, _surface: super::Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec> { + // Try to use high performance order by default (returns None on Windows < 1803) + let factory6 = match self.factory.cast::().into_result() { + Ok(f6) => { + // It's okay to decrement the refcount here because we + // have another reference to the factory already owned by `self`. + f6.destroy(); + Some(f6) + } + Err(err) => { + log::info!("Failed to cast DXGI to 1.6: {}", err); + None + } + }; + + // Enumerate adapters + let mut adapters = Vec::new(); + for cur_index in 0.. { + let raw = match factory6 { + Some(factory) => { + let mut adapter2 = native::WeakPtr::::null(); + let hr = factory.EnumAdapterByGpuPreference( + cur_index, + dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &dxgi1_2::IDXGIAdapter2::uuidof(), + adapter2.mut_void(), + ); + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + adapter2 + } + None => { + let mut adapter1 = native::WeakPtr::::null(); + let hr = self + .factory + .EnumAdapters1(cur_index, adapter1.mut_void() as *mut *mut _); + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + match adapter1.cast::().into_result() { + Ok(adapter2) => { + adapter1.destroy(); + adapter2 + } + Err(err) => { + log::error!("Failed casting to Adapter2: {}", err); + break; + } + } + } + }; + + adapters.extend(super::Adapter::expose(raw, &self.library, self.flags)); + } + adapters + } +} diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs new file mode 100644 index 0000000000..8dea5cc9e4 --- /dev/null +++ b/wgpu-hal/src/dx12/mod.rs @@ -0,0 +1,681 @@ +/*! +# DirectX12 API internals. + +## Pipeline Layout + +!*/ + +mod adapter; +mod command; +mod conv; +mod descriptor; +mod device; +mod instance; + +use arrayvec::ArrayVec; +use parking_lot::Mutex; +use std::{borrow::Cow, mem, num::NonZeroU32, ptr, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, + um::{d3d12, synchapi, winbase, winnt}, + Interface as _, +}; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +trait HResult { + fn into_result(self) -> Result>; + fn into_device_result(self, description: &str) -> Result; +} +impl HResult<()> for i32 { + fn into_result(self) -> Result<(), Cow<'static, str>> { + if self >= 0 { + return Ok(()); + } + let description = match self { + winerror::E_UNEXPECTED => "unexpected", + winerror::E_NOTIMPL => "not implemented", + winerror::E_OUTOFMEMORY => "out of memory", + winerror::E_INVALIDARG => "invalid argument", + _ => return Err(Cow::Owned(format!("0x{:X}", self as u32))), + }; + Err(Cow::Borrowed(description)) + } + fn into_device_result(self, description: &str) -> Result<(), crate::DeviceError> { + self.into_result().map_err(|err| { + log::error!("{} failed: {}", description, err); + if self == winerror::E_OUTOFMEMORY { + crate::DeviceError::OutOfMemory + } else { + crate::DeviceError::Lost + } + }) + } +} + +impl HResult for (T, i32) { + fn into_result(self) -> Result> { + self.1.into_result().map(|()| self.0) + } + fn into_device_result(self, description: &str) -> Result { + self.1.into_device_result(description).map(|()| self.0) + } +} + +const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; + +pub struct Instance { + factory: native::Factory4, + library: Arc, + _lib_dxgi: native::DxgiLib, + flags: crate::InstanceFlags, +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +struct SwapChain { + raw: native::WeakPtr, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + resources: Vec, + waitable: winnt::HANDLE, + acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, +} + +pub struct Surface { + factory: native::WeakPtr, + wnd_handle: windef::HWND, + swap_chain: Option, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + Unified { cache_coherent: bool }, + NonUnified, +} + +#[derive(Debug, Clone, Copy)] +struct PrivateCapabilities { + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, + shader_debug_info: bool, + heap_create_not_zeroed: bool, +} + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +pub struct Adapter { + raw: native::WeakPtr, + device: native::Device, + library: Arc, + private_caps: PrivateCapabilities, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] + workarounds: Workarounds, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +/// Helper structure for waiting for GPU. +struct Idler { + fence: native::Fence, + event: native::Event, +} + +impl Idler { + unsafe fn destroy(self) { + self.fence.destroy(); + } +} + +struct CommandSignatures { + draw: native::CommandSignature, + draw_indexed: native::CommandSignature, + dispatch: native::CommandSignature, +} + +impl CommandSignatures { + unsafe fn destroy(&self) { + self.draw.destroy(); + self.draw_indexed.destroy(); + self.dispatch.destroy(); + } +} + +struct DeviceShared { + features: wgt::Features, + zero_buffer: native::Resource, + cmd_signatures: CommandSignatures, + heap_views: descriptor::GeneralHeap, + heap_samplers: descriptor::GeneralHeap, +} + +impl DeviceShared { + unsafe fn destroy(&self) { + self.zero_buffer.destroy(); + self.cmd_signatures.destroy(); + self.heap_views.raw.destroy(); + self.heap_samplers.raw.destroy(); + } +} + +pub struct Device { + raw: native::Device, + present_queue: native::CommandQueue, + idler: Idler, + private_caps: PrivateCapabilities, + shared: Arc, + // CPU only pools + rtv_pool: Mutex, + dsv_pool: Mutex, + srv_uav_pool: Mutex, + sampler_pool: Mutex, + // library + library: Arc, +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue { + raw: native::CommandQueue, + temp_lists: Vec, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +impl Drop for Queue { + fn drop(&mut self) { + unsafe { + self.raw.destroy(); + } + } +} + +#[derive(Default)] +struct Temp { + marker: Vec, + barriers: Vec, +} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.barriers.clear(); + } +} + +struct PassResolve { + src: (native::Resource, u32), + dst: (native::Resource, u32), + format: dxgiformat::DXGI_FORMAT, +} + +enum PassKind { + Render, + Compute, + Transfer, +} + +struct PassState { + has_label: bool, + resolves: ArrayVec, + vertex_buffers: [d3d12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: usize, + kind: PassKind, +} + +impl PassState { + fn new() -> Self { + PassState { + has_label: false, + resolves: ArrayVec::new(), + vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: 0, + kind: PassKind::Transfer, + } + } + + fn clear(&mut self) { + self.has_label = false; + self.resolves.clear(); + self.dirty_vertex_buffers = 0; + self.kind = PassKind::Transfer; + } +} + +pub struct CommandEncoder { + allocator: native::CommandAllocator, + device: native::Device, + shared: Arc, + list: Option, + free_lists: Vec, + pass: PassState, + temp: Temp, +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +pub struct CommandBuffer { + raw: native::GraphicsCommandList, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer { + resource: native::Resource, + size: wgt::BufferAddress, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl crate::BufferBinding<'_, Api> { + fn resolve_size(&self) -> wgt::BufferAddress { + match self.size { + Some(size) => size.get(), + None => self.buffer.size - self.offset, + } + } + + fn resolve_address(&self) -> wgt::BufferAddress { + self.buffer.resource.gpu_virtual_address() + self.offset + } +} + +#[derive(Debug)] +pub struct Texture { + resource: native::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +impl Texture { + fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + self.size.depth_or_array_layers + } + wgt::TextureDimension::D3 => 1, + } + } + + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { + mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count + } + + fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { + self.calc_subresource(base.mip_level, base.array_layer, 0) + } +} + +#[derive(Debug)] +pub struct TextureView { + raw_format: dxgiformat::DXGI_FORMAT, + target_base: (native::Resource, u32), + handle_srv: Option, + handle_uav: Option, + handle_rtv: Option, + handle_dsv_ro: Option, + handle_dsv_rw: Option, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +#[derive(Debug)] +pub struct Sampler { + handle: descriptor::Handle, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Debug)] +pub struct QuerySet { + raw: native::QueryHeap, + raw_ty: d3d12::D3D12_QUERY_TYPE, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + raw: native::Fence, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +pub struct BindGroupLayout { + /// Sorted list of entries. + entries: Vec, + cpu_heap_views: Option, + cpu_heap_samplers: Option, + copy_counts: Vec, // all 1's +} + +enum BufferViewKind { + Constant, + ShaderResource, + UnorderedAccess, +} + +#[derive(Debug)] +pub struct BindGroup { + handle_views: Option, + handle_samplers: Option, + dynamic_buffers: Vec, +} + +bitflags::bitflags! { + struct TableTypes: u8 { + const SRV_CBV_UAV = 0x1; + const SAMPLERS = 0x2; + } +} + +struct BindGroupInfo { + base_root_index: u32, + tables: TableTypes, + dynamic_buffers: Vec, +} + +pub struct PipelineLayout { + raw: native::RootSignature, + // Storing for each associated bind group, which tables we created + // in the root signature. This is required for binding descriptor sets. + bind_group_infos: ArrayVec, + naga_options: naga::back::hlsl::Options, +} + +unsafe impl Send for PipelineLayout {} +unsafe impl Sync for PipelineLayout {} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, +} + +pub struct RenderPipeline { + raw: native::PipelineState, + signature: native::RootSignature, + topology: d3d12::D3D12_PRIMITIVE_TOPOLOGY, + vertex_strides: [Option; crate::MAX_VERTEX_BUFFERS], +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: native::PipelineState, + signature: native::RootSignature, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +impl SwapChain { + unsafe fn release_resources(self) -> native::WeakPtr { + for resource in self.resources { + resource.destroy(); + } + self.raw + } + + unsafe fn wait(&mut self, timeout_ms: u32) -> Result { + match synchapi::WaitForSingleObject(self.waitable, timeout_ms) { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::SurfaceError::Lost) + } + } + } +} + +impl crate::Surface for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + match config.present_mode { + wgt::PresentMode::Immediate => { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + _ => {} + } + + let non_srgb_format = conv::map_texture_format_nosrgb(config.format); + + let swap_chain = match self.swap_chain.take() { + //Note: this path doesn't properly re-initialize all of the things + Some(sc) => { + // can't have image resources in flight used by GPU + let _ = device.wait_idle(); + + let raw = sc.release_resources(); + let result = raw.ResizeBuffers( + config.swap_chain_size, + config.extent.width, + config.extent.height, + non_srgb_format, + flags, + ); + if let Err(err) = result.into_result() { + log::error!("ResizeBuffers failed: {}", err); + return Err(crate::SurfaceError::Other("window is in use")); + } + raw + } + None => { + let mut swap_chain1 = native::WeakPtr::::null(); + + let raw_desc = dxgi1_2::DXGI_SWAP_CHAIN_DESC1 { + AlphaMode: conv::map_acomposite_alpha_mode(config.composite_alpha_mode), + BufferCount: config.swap_chain_size, + Width: config.extent.width, + Height: config.extent.height, + Format: non_srgb_format, + Flags: flags, + BufferUsage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Scaling: dxgi1_2::DXGI_SCALING_STRETCH, + Stereo: 0, + SwapEffect: dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD, + }; + + let hr = self.factory.CreateSwapChainForHwnd( + device.present_queue.as_mut_ptr() as *mut _, + self.wnd_handle, + &raw_desc, + ptr::null(), + ptr::null_mut(), + swap_chain1.mut_void() as *mut *mut _, + ); + + if let Err(err) = hr.into_result() { + log::error!("SwapChain creation error: {}", err); + return Err(crate::SurfaceError::Other("swap chain creation")); + } + + match swap_chain1.cast::().into_result() { + Ok(swap_chain3) => { + swap_chain1.destroy(); + swap_chain3 + } + Err(err) => { + log::error!("Unable to cast swap chain: {}", err); + return Err(crate::SurfaceError::Other("swap chain cast to 3")); + } + } + } + }; + + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + self.factory.MakeWindowAssociation( + self.wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ); + + swap_chain.SetMaximumFrameLatency(config.swap_chain_size); + let waitable = swap_chain.GetFrameLatencyWaitableObject(); + + let mut resources = vec![native::Resource::null(); config.swap_chain_size as usize]; + for (i, res) in resources.iter_mut().enumerate() { + swap_chain.GetBuffer(i as _, &d3d12::ID3D12Resource::uuidof(), res.mut_void()); + } + + self.swap_chain = Some(SwapChain { + raw: swap_chain, + resources, + waitable, + acquired_count: 0, + present_mode: config.present_mode, + format: config.format, + size: config.extent, + }); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Device) { + if let Some(mut sc) = self.swap_chain.take() { + let _ = sc.wait(winbase::INFINITE); + //TODO: this shouldn't be needed, + // but it complains that the queue is still used otherwise + let _ = device.wait_idle(); + let raw = sc.release_resources(); + raw.destroy(); + } + } + + unsafe fn acquire_texture( + &mut self, + timeout_ms: u32, + ) -> Result>, crate::SurfaceError> { + let sc = self.swap_chain.as_mut().unwrap(); + + sc.wait(timeout_ms)?; + + let base_index = sc.raw.GetCurrentBackBufferIndex() as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index], + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: Texture) { + let sc = self.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + } +} + +impl crate::Queue for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + self.temp_lists.clear(); + for cmd_buf in command_buffers { + self.temp_lists.push(cmd_buf.raw.as_list()); + } + + self.raw.execute_command_lists(&self.temp_lists); + + if let Some((fence, value)) = signal_fence { + self.raw + .signal(fence.raw, value) + .into_device_result("Signal fence")?; + } + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Surface, + _texture: Texture, + ) -> Result<(), crate::SurfaceError> { + let sc = surface.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + + let (interval, flags) = match sc.present_mode { + wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + wgt::PresentMode::Fifo => (1, 0), + wgt::PresentMode::Mailbox => (1, 0), + }; + sc.raw.Present(interval, flags); + + Ok(()) + } +} diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 3c6a51eaec..c2710e4532 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -322,7 +322,7 @@ impl crate::CommandEncoder for Encoder { unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) {} unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) {} unsafe fn set_stencil_reference(&mut self, value: u32) {} - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) {} + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {} unsafe fn draw( &mut self, diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 00f5d7ea0d..153755d8f9 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -204,7 +204,9 @@ impl super::Adapter { vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) }; - let mut features = wgt::Features::empty() | wgt::Features::TEXTURE_COMPRESSION_ETC2; + let mut features = wgt::Features::empty() + | wgt::Features::TEXTURE_COMPRESSION_ETC2 + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; features.set( wgt::Features::DEPTH_CLAMPING, extensions.contains("GL_EXT_depth_clamp"), diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 5fb632f330..b5e8ff8355 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -767,14 +767,8 @@ impl crate::CommandEncoder for super::CommandEncoder { self.state.stencil.back.reference = value; self.rebind_stencil_func(); } - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) { - let color = [ - color.r as f32, - color.g as f32, - color.b as f32, - color.a as f32, - ]; - self.cmd_buffer.commands.push(C::SetBlendConstant(color)); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.cmd_buffer.commands.push(C::SetBlendConstant(*color)); } unsafe fn draw( diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs index fbba13b96e..a5771be032 100644 --- a/wgpu-hal/src/gles/conv.rs +++ b/wgpu-hal/src/gles/conv.rs @@ -70,8 +70,8 @@ impl super::AdapterShared { | Tf::Bc4RSnorm | Tf::Bc5RgUnorm | Tf::Bc5RgSnorm - | Tf::Bc6hRgbSfloat | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbSfloat | Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => unimplemented!(), Tf::Etc2RgbUnorm => (glow::COMPRESSED_RGB8_ETC2, glow::RGB, 0), diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 9e4cf285cd..2b0dbc522a 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -201,52 +201,51 @@ impl super::Queue { ref copy, } => { //TODO: cubemaps - //TODO: how is depth handled? + //TODO: handle 3D copies gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)); - for layer in 0..copy.size.depth_or_array_layers as i32 { - if is_3d_target(src_target) { - //TODO: handle GLES without framebuffer_texture_3d - gl.framebuffer_texture_layer( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - Some(src), - copy.src_base.mip_level as i32, - copy.src_base.origin.z as i32 + layer, - ); - } else { - gl.framebuffer_texture_2d( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - src_target, - Some(src), - copy.src_base.mip_level as i32, - ); - } - gl.bind_texture(dst_target, Some(dst)); - if is_3d_target(dst_target) { - gl.copy_tex_sub_image_3d( - dst_target, - copy.dst_base.mip_level as i32, - copy.dst_base.origin.x as i32, - copy.dst_base.origin.y as i32, - copy.dst_base.origin.z as i32 + layer, - copy.src_base.origin.x as i32, - copy.src_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - ); - } else { - gl.copy_tex_sub_image_2d( - dst_target, - copy.dst_base.mip_level as i32, - copy.dst_base.origin.x as i32, - copy.dst_base.origin.y as i32, - copy.src_base.origin.x as i32, - copy.src_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - ); - } + if is_3d_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.src_base.mip_level as i32, + copy.src_base.array_layer as i32, + ); + } else { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.src_base.mip_level as i32, + ); + } + + gl.bind_texture(dst_target, Some(dst)); + if is_3d_target(dst_target) { + gl.copy_tex_sub_image_3d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.dst_base.origin.z as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ); + } else { + gl.copy_tex_sub_image_2d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ); } } C::CopyBufferToTexture { @@ -286,7 +285,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.external, format_desc.data_type, unpack_data, @@ -306,26 +305,18 @@ impl super::Queue { ); } glow::TEXTURE_CUBE_MAP => { - let mut offset = copy.buffer_layout.offset as u32; - for face_index in 0..copy.size.depth_or_array_layers { - gl.tex_sub_image_2d( - CUBEMAP_FACES - [(copy.texture_base.origin.z + face_index) as usize], - copy.texture_base.mip_level as i32, - copy.texture_base.origin.x as i32, - copy.texture_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - format_desc.external, - format_desc.data_type, - glow::PixelUnpackData::BufferOffset(offset), - ); - offset += copy - .buffer_layout - .rows_per_image - .map_or(0, |rpi| rpi.get()) - * copy.buffer_layout.bytes_per_row.map_or(0, |bpr| bpr.get()); - } + let offset = copy.buffer_layout.offset as u32; + gl.tex_sub_image_2d( + CUBEMAP_FACES[copy.texture_base.array_layer as usize], + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + glow::PixelUnpackData::BufferOffset(offset), + ); } glow::TEXTURE_CUBE_MAP_ARRAY => { //Note: not sure if this is correct! @@ -337,7 +328,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.external, format_desc.data_type, unpack_data, @@ -349,10 +340,9 @@ impl super::Queue { let bytes_per_image = copy.buffer_layout.rows_per_image.map_or(1, |rpi| rpi.get()) * copy.buffer_layout.bytes_per_row.map_or(1, |bpr| bpr.get()); - let offset_end = copy.buffer_layout.offset as u32 - + bytes_per_image * copy.size.depth_or_array_layers; + let offset = copy.buffer_layout.offset as u32; let unpack_data = glow::CompressedPixelUnpackData::BufferRange( - copy.buffer_layout.offset as u32..offset_end, + offset..offset + bytes_per_image, ); match dst_target { glow::TEXTURE_3D | glow::TEXTURE_2D_ARRAY => { @@ -364,7 +354,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.internal, unpack_data, ); @@ -382,23 +372,18 @@ impl super::Queue { ); } glow::TEXTURE_CUBE_MAP => { - let mut offset = copy.buffer_layout.offset as u32; - for face_index in 0..copy.size.depth_or_array_layers { - gl.compressed_tex_sub_image_2d( - CUBEMAP_FACES - [(copy.texture_base.origin.z + face_index) as usize], - copy.texture_base.mip_level as i32, - copy.texture_base.origin.x as i32, - copy.texture_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - format_desc.internal, - glow::CompressedPixelUnpackData::BufferRange( - offset..offset + bytes_per_image, - ), - ); - offset += bytes_per_image; - } + gl.compressed_tex_sub_image_2d( + CUBEMAP_FACES[copy.texture_base.array_layer as usize], + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.internal, + glow::CompressedPixelUnpackData::BufferRange( + offset..offset + bytes_per_image, + ), + ); } glow::TEXTURE_CUBE_MAP_ARRAY => { //Note: not sure if this is correct! @@ -410,7 +395,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.internal, unpack_data, ); @@ -445,45 +430,37 @@ impl super::Queue { .map_or(copy.size.width, |bpr| { bpr.get() / format_info.block_size as u32 }); - let column_texels = copy - .buffer_layout - .rows_per_image - .map_or(copy.size.height, |rpi| rpi.get()); gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32); gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(dst)); gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)); - for layer in 0..copy.size.depth_or_array_layers { - let offset = copy.buffer_layout.offset as u32 - + layer * column_texels * row_texels * format_info.block_size as u32; - if is_3d_target(src_target) { - //TODO: handle GLES without framebuffer_texture_3d - gl.framebuffer_texture_layer( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - Some(src), - copy.texture_base.mip_level as i32, - copy.texture_base.origin.z as i32 + layer as i32, - ); - } else { - gl.framebuffer_texture_2d( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - src_target, - Some(src), - copy.texture_base.mip_level as i32, - ); - } - gl.read_pixels( - copy.texture_base.origin.x as i32, - copy.texture_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - format_desc.external, - format_desc.data_type, - glow::PixelPackData::BufferOffset(offset), + if is_3d_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + copy.texture_base.array_layer as i32, + ); + } else { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.texture_base.mip_level as i32, ); } + gl.read_pixels( + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + glow::PixelPackData::BufferOffset(copy.buffer_layout.offset as u32), + ); } C::SetIndexBuffer(buffer) => { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(buffer)); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 00380cc664..4e42368746 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -42,8 +42,12 @@ )] #[cfg(all(feature = "metal", not(any(target_os = "macos", target_os = "ios"))))] -compile_error!("Metal backend enabled on non-Apple OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); +compile_error!("Metal API enabled on non-Apple OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); +#[cfg(all(feature = "dx12", not(windows)))] +compile_error!("DX12 API enabled on non-Windows OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); +#[cfg(all(feature = "dx12", windows))] +mod dx12; mod empty; #[cfg(feature = "gles")] mod gles; @@ -54,6 +58,8 @@ mod vulkan; pub mod util; pub mod api { + #[cfg(feature = "dx12")] + pub use super::dx12::Api as Dx12; pub use super::empty::Api as Empty; #[cfg(feature = "gles")] pub use super::gles::Api as Gles; @@ -345,6 +351,8 @@ pub trait CommandEncoder: Send + Sync { where T: Iterator; + /// Copy from one texture to another. + /// Works with a single array layer. /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. unsafe fn copy_texture_to_texture( &mut self, @@ -355,11 +363,15 @@ pub trait CommandEncoder: Send + Sync { ) where T: Iterator; + /// Copy from buffer to texture. + /// Works with a single array layer. /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. unsafe fn copy_buffer_to_texture(&mut self, src: &A::Buffer, dst: &A::Texture, regions: T) where T: Iterator; + /// Copy from texture to buffer. + /// Works with a single array layer. unsafe fn copy_texture_to_buffer( &mut self, src: &A::Texture, @@ -425,7 +437,7 @@ pub trait CommandEncoder: Send + Sync { unsafe fn set_viewport(&mut self, rect: &Rect, depth_range: Range); unsafe fn set_scissor_rect(&mut self, rect: &Rect); unsafe fn set_stencil_reference(&mut self, value: u32); - unsafe fn set_blend_constants(&mut self, color: &wgt::Color); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]); unsafe fn draw( &mut self, @@ -614,6 +626,7 @@ bitflags::bitflags! { /// If a usage is not ordered, then even if it doesn't change between draw calls, there /// still need to be pipeline barriers inserted for synchronization. const ORDERED = Self::READ_ALL.bits | Self::COPY_DST.bits | Self::COLOR_TARGET.bits | Self::DEPTH_STENCIL_WRITE.bits; + //TODO: remove this const UNINITIALIZED = 0xFFFF; } } @@ -999,35 +1012,41 @@ pub struct BufferCopy { #[derive(Clone, Debug)] pub struct TextureCopyBase { - pub origin: wgt::Origin3d, pub mip_level: u32, + pub array_layer: u32, + /// Origin within a texture. + /// Note: for 1D and 2D textures, Z must be 0. + pub origin: wgt::Origin3d, pub aspect: FormatAspects, } +#[derive(Clone, Copy, Debug)] +pub struct CopyExtent { + pub width: u32, + pub height: u32, + pub depth: u32, +} + #[derive(Clone, Debug)] pub struct TextureCopy { pub src_base: TextureCopyBase, pub dst_base: TextureCopyBase, - pub size: wgt::Extent3d, + pub size: CopyExtent, } #[derive(Clone, Debug)] pub struct BufferTextureCopy { pub buffer_layout: wgt::ImageDataLayout, pub texture_base: TextureCopyBase, - pub size: wgt::Extent3d, + pub size: CopyExtent, } #[derive(Debug)] pub struct Attachment<'a, A: Api> { pub view: &'a A::TextureView, - /// Contains either a single mutating usage as a target, or a valid combination - /// of read-only usages. + /// Contains either a single mutating usage as a target, + /// or a valid combination of read-only usages. pub usage: TextureUses, - /// Defines the boundary usages for the attachment. - /// It is expected to begin a render pass with `boundary_usage.start` usage, - /// and will end it with `boundary_usage.end` usage. - pub boundary_usage: Range, } // Rust gets confused about the impl requirements for `A` @@ -1036,7 +1055,6 @@ impl Clone for Attachment<'_, A> { Self { view: self.view, usage: self.usage, - boundary_usage: self.boundary_usage.clone(), } } } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 927fff2471..0814df3b70 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -199,8 +199,8 @@ impl crate::Adapter for super::Adapter { | Tf::Bc4RSnorm | Tf::Bc5RgUnorm | Tf::Bc5RgSnorm - | Tf::Bc6hRgbSfloat | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbSfloat | Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => { if pc.format_bc { @@ -889,6 +889,7 @@ impl super::PrivateCapabilities { .flags .set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true); + let base = wgt::Limits::default(); crate::Capabilities { limits: wgt::Limits { max_texture_dimension_1d: self.max_texture_size as u32, @@ -896,18 +897,20 @@ impl super::PrivateCapabilities { max_texture_dimension_3d: self.max_texture_3d_size as u32, max_texture_array_layers: self.max_texture_layers as u32, max_bind_groups: 8, - max_dynamic_uniform_buffers_per_pipeline_layout: 8, - max_dynamic_storage_buffers_per_pipeline_layout: 4, - max_sampled_textures_per_shader_stage: 16, + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: base.max_sampled_textures_per_shader_stage, max_samplers_per_shader_stage: self.max_samplers_per_stage, - max_storage_buffers_per_shader_stage: 8, - max_storage_textures_per_shader_stage: 8, + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base.max_storage_textures_per_shader_stage, max_uniform_buffers_per_shader_stage: 12, max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, - max_vertex_buffers: 8, - max_vertex_attributes: 16, - max_vertex_buffer_array_stride: 2048, + max_vertex_buffers: base.max_vertex_buffers, + max_vertex_attributes: base.max_vertex_attributes, + max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, max_push_constant_size: 0x1000, }, alignments: crate::Alignments { diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 9fbaf3c042..5fb648e151 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -156,22 +156,20 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - let (src_slice, src_origin) = conv::map_origin(©.src_base.origin, src.raw_type); - let (dst_slice, dst_origin) = conv::map_origin(©.dst_base.origin, dst.raw_type); - let (slice_count, extent) = conv::map_extent(©.size, src.raw_type); - for slice in 0..slice_count { - encoder.copy_from_texture( - &src.raw, - src_slice + slice, - copy.src_base.mip_level as u64, - src_origin, - extent, - &dst.raw, - dst_slice + slice, - copy.dst_base.mip_level as u64, - dst_origin, - ); - } + let src_origin = conv::map_origin(©.src_base.origin); + let dst_origin = conv::map_origin(©.dst_base.origin); + let extent = conv::map_copy_extent(©.size); + encoder.copy_from_texture( + &src.raw, + copy.src_base.array_layer as u64, + copy.src_base.mip_level as u64, + src_origin, + extent, + &dst.raw, + copy.dst_base.array_layer as u64, + copy.dst_base.mip_level as u64, + dst_origin, + ); } } @@ -185,8 +183,8 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - let (dst_slice, dst_origin) = conv::map_origin(©.texture_base.origin, dst.raw_type); - let (slice_count, extent) = conv::map_extent(©.size, dst.raw_type); + let dst_origin = conv::map_origin(©.texture_base.origin); + let extent = conv::map_copy_extent(©.size); let bytes_per_row = copy .buffer_layout .bytes_per_row @@ -195,21 +193,18 @@ impl crate::CommandEncoder for super::CommandEncoder { .buffer_layout .rows_per_image .map_or(0, |v| v.get() as u64 * bytes_per_row); - for slice in 0..slice_count { - let offset = copy.buffer_layout.offset + bytes_per_image * slice; - encoder.copy_from_buffer_to_texture( - &src.raw, - offset, - bytes_per_row, - bytes_per_image, - extent, - &dst.raw, - dst_slice + slice, - copy.texture_base.mip_level as u64, - dst_origin, - mtl::MTLBlitOption::empty(), - ); - } + encoder.copy_from_buffer_to_texture( + &src.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + extent, + &dst.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + dst_origin, + mtl::MTLBlitOption::empty(), + ); } } @@ -224,8 +219,8 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - let (src_slice, src_origin) = conv::map_origin(©.texture_base.origin, src.raw_type); - let (slice_count, extent) = conv::map_extent(©.size, src.raw_type); + let src_origin = conv::map_origin(©.texture_base.origin); + let extent = conv::map_copy_extent(©.size); let bytes_per_row = copy .buffer_layout .bytes_per_row @@ -234,21 +229,18 @@ impl crate::CommandEncoder for super::CommandEncoder { .buffer_layout .rows_per_image .map_or(0, |v| v.get() as u64 * bytes_per_row); - for slice in 0..slice_count { - let offset = copy.buffer_layout.offset + bytes_per_image * slice; - encoder.copy_from_texture_to_buffer( - &src.raw, - src_slice + slice, - copy.texture_base.mip_level as u64, - src_origin, - extent, - &dst.raw, - offset, - bytes_per_row, - bytes_per_image, - mtl::MTLBlitOption::empty(), - ); - } + encoder.copy_from_texture_to_buffer( + &src.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + src_origin, + extent, + &dst.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + mtl::MTLBlitOption::empty(), + ); } } @@ -681,14 +673,9 @@ impl crate::CommandEncoder for super::CommandEncoder { let encoder = self.state.render.as_ref().unwrap(); encoder.set_stencil_front_back_reference_value(value, value); } - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) { + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { let encoder = self.state.render.as_ref().unwrap(); - encoder.set_blend_color( - color.r as f32, - color.g as f32, - color.b as f32, - color.a as f32, - ); + encoder.set_blend_color(color[0], color[1], color[2], color[3]); } unsafe fn draw( diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index 61ae6cab14..b1c7e0b303 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -261,34 +261,20 @@ pub fn map_range(range: &crate::MemoryRange) -> mtl::NSRange { } } -pub fn map_extent(extent: &wgt::Extent3d, raw_type: mtl::MTLTextureType) -> (u64, mtl::MTLSize) { - let (depth, array_layers) = match raw_type { - mtl::MTLTextureType::D3 => (extent.depth_or_array_layers as u64, 1), - _ => (1, extent.depth_or_array_layers as u64), - }; - ( - array_layers, - mtl::MTLSize { - width: extent.width as u64, - height: extent.height as u64, - depth, - }, - ) +pub fn map_copy_extent(extent: &crate::CopyExtent) -> mtl::MTLSize { + mtl::MTLSize { + width: extent.width as u64, + height: extent.height as u64, + depth: extent.depth as u64, + } } -pub fn map_origin(origin: &wgt::Origin3d, raw_type: mtl::MTLTextureType) -> (u64, mtl::MTLOrigin) { - let (z, slice) = match raw_type { - mtl::MTLTextureType::D3 => (origin.z as u64, 0), - _ => (0, origin.z as u64), - }; - ( - slice, - mtl::MTLOrigin { - x: origin.x as u64, - y: origin.y as u64, - z, - }, - ) +pub fn map_origin(origin: &wgt::Origin3d) -> mtl::MTLOrigin { + mtl::MTLOrigin { + x: origin.x as u64, + y: origin.y as u64, + z: origin.z as u64, + } } pub fn map_store_action(store: bool, resolve: bool) -> mtl::MTLStoreAction { diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 23caea67e6..358de98e5f 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -676,7 +676,7 @@ impl crate::Device for super::Device { match shader { crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga }), crate::ShaderInput::SpirV(_) => { - unreachable!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") } } } diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 9b2f7f4630..d87a7361e1 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -18,13 +18,11 @@ impl super::Texture { where T: Iterator, { - let dim = self.dim; let aspects = self.aspects; let fi = self.format_info; regions.map(move |r| { - let (layer_count, image_extent) = conv::map_extent(r.size, dim); let (image_subresource, image_offset) = - conv::map_subresource_layers(&r.texture_base, dim, aspects, layer_count); + conv::map_subresource_layers(&r.texture_base, aspects); vk::BufferImageCopy { buffer_offset: r.buffer_layout.offset, buffer_row_length: r.buffer_layout.bytes_per_row.map_or(0, |bpr| { @@ -36,7 +34,7 @@ impl super::Texture { .map_or(0, |rpi| rpi.get() * fi.block_dimensions.1 as u32), image_subresource, image_offset, - image_extent, + image_extent: conv::map_copy_extent(&r.size), } }) } @@ -228,17 +226,16 @@ impl crate::CommandEncoder for super::CommandEncoder { let src_layout = conv::derive_image_layout(src_usage, src.aspects); let vk_regions_iter = regions.map(|r| { - let (layer_count, extent) = conv::map_extent(r.size, src.dim); let (src_subresource, src_offset) = - conv::map_subresource_layers(&r.src_base, src.dim, src.aspects, layer_count); + conv::map_subresource_layers(&r.src_base, src.aspects); let (dst_subresource, dst_offset) = - conv::map_subresource_layers(&r.dst_base, dst.dim, dst.aspects, layer_count); + conv::map_subresource_layers(&r.dst_base, dst.aspects); vk::ImageCopy { src_subresource, src_offset, dst_subresource, dst_offset, - extent, + extent: conv::map_copy_extent(&r.size), } }); @@ -571,16 +568,8 @@ impl crate::CommandEncoder for super::CommandEncoder { .raw .cmd_set_stencil_reference(self.active, vk::StencilFaceFlags::all(), value); } - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) { - let vk_constants = [ - color.r as f32, - color.g as f32, - color.b as f32, - color.a as f32, - ]; - self.device - .raw - .cmd_set_blend_constants(self.active, &vk_constants); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.device.raw.cmd_set_blend_constants(self.active, color); } unsafe fn draw( diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 41a33082d0..aa057fc678 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -66,8 +66,8 @@ impl super::PrivateCapabilities { Tf::Bc4RSnorm => F::BC4_SNORM_BLOCK, Tf::Bc5RgUnorm => F::BC5_UNORM_BLOCK, Tf::Bc5RgSnorm => F::BC5_SNORM_BLOCK, - Tf::Bc6hRgbSfloat => F::BC6H_SFLOAT_BLOCK, Tf::Bc6hRgbUfloat => F::BC6H_UFLOAT_BLOCK, + Tf::Bc6hRgbSfloat => F::BC6H_SFLOAT_BLOCK, Tf::Bc7RgbaUnorm => F::BC7_UNORM_BLOCK, Tf::Bc7RgbaUnormSrgb => F::BC7_SRGB_BLOCK, Tf::Etc2RgbUnorm => F::ETC2_R8G8B8_UNORM_BLOCK, @@ -119,9 +119,7 @@ impl crate::Attachment<'_, super::Api> { let aspects = self.view.aspects(); super::AttachmentKey { format: caps.map_texture_format(self.view.attachment.view_format), - layout_pre: derive_image_layout(self.boundary_usage.start, aspects), - layout_in: derive_image_layout(self.usage, aspects), - layout_post: derive_image_layout(self.boundary_usage.end, aspects), + layout: derive_image_layout(self.usage, aspects), ops, } } @@ -250,7 +248,7 @@ pub fn map_texture_usage_to_barrier( access |= vk::AccessFlags::SHADER_WRITE; } - if usage == crate::TextureUses::UNINITIALIZED { + if usage == crate::TextureUses::UNINITIALIZED || usage.is_empty() { ( vk::PipelineStageFlags::TOP_OF_PIPE, vk::AccessFlags::empty(), @@ -352,42 +350,6 @@ pub fn map_aspects(aspects: crate::FormatAspects) -> vk::ImageAspectFlags { flags } -pub fn map_origin( - origin: wgt::Origin3d, - texture_dim: wgt::TextureDimension, -) -> (u32, vk::Offset3D) { - let (z, array_layer) = match texture_dim { - wgt::TextureDimension::D3 => (origin.z as i32, 0), - _ => (0, origin.z), - }; - ( - array_layer, - vk::Offset3D { - x: origin.x as i32, - y: origin.y as i32, - z, - }, - ) -} - -pub fn map_extent( - extent: wgt::Extent3d, - texture_dim: wgt::TextureDimension, -) -> (u32, vk::Extent3D) { - let (depth, array_layers) = match texture_dim { - wgt::TextureDimension::D3 => (extent.depth_or_array_layers, 1), - _ => (1, extent.depth_or_array_layers), - }; - ( - array_layers, - vk::Extent3D { - width: extent.width, - height: extent.height, - depth, - }, - ) -} - pub fn map_attachment_ops( op: crate::AttachmentOps, ) -> (vk::AttachmentLoadOp, vk::AttachmentStoreOp) { @@ -541,6 +503,14 @@ pub fn map_view_dimension(dim: wgt::TextureViewDimension) -> vk::ImageViewType { } } +pub fn map_copy_extent(extent: &crate::CopyExtent) -> vk::Extent3D { + vk::Extent3D { + width: extent.width, + height: extent.height, + depth: extent.depth, + } +} + pub fn map_subresource_range( range: &wgt::ImageSubresourceRange, texture_aspect: crate::FormatAspects, @@ -560,16 +530,18 @@ pub fn map_subresource_range( pub fn map_subresource_layers( base: &crate::TextureCopyBase, - texture_dim: wgt::TextureDimension, texture_aspect: crate::FormatAspects, - layer_count: u32, ) -> (vk::ImageSubresourceLayers, vk::Offset3D) { - let (base_array_layer, offset) = map_origin(base.origin, texture_dim); + let offset = vk::Offset3D { + x: base.origin.x as i32, + y: base.origin.y as i32, + z: base.origin.z as i32, + }; let subresource = vk::ImageSubresourceLayers { aspect_mask: map_aspects(base.aspect & texture_aspect), mip_level: base.mip_level, - base_array_layer, - layer_count, + base_array_layer: base.array_layer, + layer_count: 1, }; (subresource, offset) } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a1a34a2555..152c717293 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -74,7 +74,7 @@ impl super::DeviceShared { for cat in e.key().colors.iter() { color_refs.push(vk::AttachmentReference { attachment: vk_attachments.len() as u32, - layout: cat.base.layout_in, + layout: cat.base.layout, }); vk_attachments.push({ let (load_op, store_op) = conv::map_attachment_ops(cat.base.ops); @@ -83,14 +83,14 @@ impl super::DeviceShared { .samples(samples) .load_op(load_op) .store_op(store_op) - .initial_layout(cat.base.layout_pre) - .final_layout(cat.base.layout_post) + .initial_layout(cat.base.layout) + .final_layout(cat.base.layout) .build() }); let at_ref = if let Some(ref rat) = cat.resolve { let at_ref = vk::AttachmentReference { attachment: vk_attachments.len() as u32, - layout: rat.layout_in, + layout: rat.layout, }; let (load_op, store_op) = conv::map_attachment_ops(rat.ops); let vk_attachment = vk::AttachmentDescription::builder() @@ -98,8 +98,8 @@ impl super::DeviceShared { .samples(vk::SampleCountFlags::TYPE_1) .load_op(load_op) .store_op(store_op) - .initial_layout(rat.layout_pre) - .final_layout(rat.layout_post) + .initial_layout(rat.layout) + .final_layout(rat.layout) .build(); vk_attachments.push(vk_attachment); at_ref @@ -115,7 +115,7 @@ impl super::DeviceShared { if let Some(ref ds) = e.key().depth_stencil { ds_ref = Some(vk::AttachmentReference { attachment: vk_attachments.len() as u32, - layout: ds.base.layout_in, + layout: ds.base.layout, }); let (load_op, store_op) = conv::map_attachment_ops(ds.base.ops); let (stencil_load_op, stencil_store_op) = @@ -127,8 +127,8 @@ impl super::DeviceShared { .store_op(store_op) .stencil_load_op(stencil_load_op) .stencil_store_op(stencil_store_op) - .initial_layout(ds.base.layout_pre) - .final_layout(ds.base.layout_post) + .initial_layout(ds.base.layout) + .final_layout(ds.base.layout) .build(); vk_attachments.push(vk_attachment); } @@ -654,7 +654,11 @@ impl crate::Device for super::Device { &self, desc: &crate::TextureDescriptor, ) -> Result { - let (array_layer_count, vk_extent) = conv::map_extent(desc.size, desc.dimension); + let (depth, array_layer_count) = match desc.dimension { + wgt::TextureDimension::D3 => (desc.size.depth_or_array_layers, 1), + _ => (1, desc.size.depth_or_array_layers), + }; + let mut raw_flags = vk::ImageCreateFlags::empty(); if desc.dimension == wgt::TextureDimension::D2 && desc.size.depth_or_array_layers % 6 == 0 { raw_flags |= vk::ImageCreateFlags::CUBE_COMPATIBLE; @@ -664,7 +668,11 @@ impl crate::Device for super::Device { .flags(raw_flags) .image_type(conv::map_texture_dimension(desc.dimension)) .format(self.shared.private_caps.map_texture_format(desc.format)) - .extent(vk_extent) + .extent(vk::Extent3D { + width: desc.size.width, + height: desc.size.height, + depth, + }) .mip_levels(desc.mip_level_count) .array_layers(array_layer_count) .samples(vk::SampleCountFlags::from_raw(desc.sample_count)) @@ -699,7 +707,6 @@ impl crate::Device for super::Device { raw, block: Some(block), usage: desc.usage, - dim: desc.dimension, aspects: crate::FormatAspects::from(desc.format), format_info: desc.format.describe(), raw_flags, @@ -725,7 +732,7 @@ impl crate::Device for super::Device { .subresource_range(conv::map_subresource_range(&desc.range, texture.aspects)); let mut image_view_info; - if self.shared.private_caps.image_view_usage { + if self.shared.private_caps.image_view_usage && !desc.usage.is_empty() { image_view_info = vk::ImageViewUsageCreateInfo::builder() .usage(conv::map_texture_usage(desc.usage)) .build(); diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index d19a946772..39261edbed 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -23,7 +23,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( return vk::FALSE; } - let message_severity = match message_severity { + let level = match message_severity { vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => log::Level::Error, vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => log::Level::Warn, vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info, @@ -45,7 +45,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( }; log::log!( - message_severity, + level, "{:?} [{} (0x{:x})]\n\t{}", message_type, message_id_name, @@ -64,7 +64,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( .map(|lbl| CStr::from_ptr(lbl).to_string_lossy()) }) .collect::>(); - log::log!(message_severity, "\tqueues: {}", names.join(", ")); + log::log!(level, "\tqueues: {}", names.join(", ")); } if cd.cmd_buf_label_count != 0 { @@ -78,7 +78,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( .map(|lbl| CStr::from_ptr(lbl).to_string_lossy()) }) .collect::>(); - log::log!(message_severity, "\tcommand buffers: {}", names.join(", ")); + log::log!(level, "\tcommand buffers: {}", names.join(", ")); } if cd.object_count != 0 { @@ -99,7 +99,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( ) }) .collect::>(); - log::log!(message_severity, "\tobjects: {}", names.join(", ")); + log::log!(level, "\tobjects: {}", names.join(", ")); } vk::FALSE @@ -647,7 +647,6 @@ impl crate::Surface for super::Surface { raw: sc.images[index as usize], block: None, usage: sc.config.usage, - dim: wgt::TextureDimension::D2, aspects: crate::FormatAspects::COLOR, format_info: sc.config.format.describe(), raw_flags: vk::ImageCreateFlags::empty(), diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 4b5e17f97c..e29c37f83b 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -162,20 +162,16 @@ struct PrivateCapabilities { #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct AttachmentKey { format: vk::Format, - layout_pre: vk::ImageLayout, - layout_in: vk::ImageLayout, - layout_post: vk::ImageLayout, + layout: vk::ImageLayout, ops: crate::AttachmentOps, } impl AttachmentKey { /// Returns an attachment key for a compatible attachment. - fn compatible(format: vk::Format, layout_in: vk::ImageLayout) -> Self { + fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self { Self { format, - layout_pre: vk::ImageLayout::GENERAL, - layout_in, - layout_post: vk::ImageLayout::GENERAL, + layout, ops: crate::AttachmentOps::all(), } } @@ -255,7 +251,6 @@ pub struct Texture { raw: vk::Image, block: Option>, usage: crate::TextureUses, - dim: wgt::TextureDimension, aspects: crate::FormatAspects, format_info: wgt::TextureFormatInfo, raw_flags: vk::ImageCreateFlags, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 51ed60c4b4..f188fc820b 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -110,7 +110,7 @@ bitflags::bitflags! { /// Vulkan + Metal + DX12 + Browser WebGPU const PRIMARY = Self::VULKAN.bits | Self::METAL.bits - | Self::DX12.bits + //| Self::DX12.bits // enable when Naga is polished | Self::BROWSER_WEBGPU.bits; /// All the apis that wgpu offers second tier of support for. These may /// be unsupported/still experimental. @@ -204,7 +204,7 @@ bitflags::bitflags! { /// /// Supported Platforms: /// - Vulkan (works) - /// - DX12 (future) + /// - DX12 (works) /// /// This is a web and native feature. const TIMESTAMP_QUERY = 0x0000_0000_0000_0004; @@ -219,7 +219,7 @@ bitflags::bitflags! { /// /// Supported Platforms: /// - Vulkan (works) - /// - DX12 (future) + /// - DX12 (works) /// /// This is a web and native feature. const PIPELINE_STATISTICS_QUERY = 0x0000_0000_0000_0008; diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index e521508b49..b2252bccd6 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -73,19 +73,19 @@ env_logger = "0.8" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" optional = true # used to test all the example shaders [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in"] # used to generate SPIR-V for the Web target [target.'cfg(target_arch = "wasm32")'.dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in", "spv-out"] [[example]]