From 198762e5cc9a4b7048051318be110b7de95afbc3 Mon Sep 17 00:00:00 2001 From: Asher Jingkong Chen <37398747+AsherJingkongChen@users.noreply.github.com> Date: Fri, 10 Jan 2025 00:03:47 +0800 Subject: [PATCH] feat: Add 32-bit floating-point atomics (`SHADER_FLOAT32_ATOMIC`) (#6234) * feat: Add 32-bit floating-point atomics * Current supported platforms: Metal * Platforms to support in the future: Vulkan Related issues or PRs: * gfx-rs/wgpu#1020 * Add changelog * Edit changelog * feat: Add 32-bit float atomics support for Vulkan (SPIR-V shaders) * atomicSub for f32 in the previous commits is removed. * Update test * chore: doc type link * refactor: Revise float atomics on msl and spv * Make branches tidy * Also revise old codes * Ensure the implementations are supported by Metal and Vulkan backends * refactor: Renaming flt32 atomics to float32 atomics * chore: Add link to Vulkan feature * fix: cargo fmt * chore: hack comment * Revert changelog * Fix: Cargo advisory * Update wgpu-hal/src/metal/adapter.rs Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> * Update naga/src/lib.rs Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> * Adjust feature flag position --------- Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> --- CHANGELOG.md | 8 + naga/src/back/spv/block.rs | 192 ++++++++++++-------- naga/src/back/spv/writer.rs | 10 + naga/src/front/spv/mod.rs | 10 +- naga/src/lib.rs | 16 +- naga/src/valid/function.rs | 103 +++++++---- naga/src/valid/mod.rs | 51 +++--- naga/src/valid/type.rs | 44 +++-- naga/tests/in/atomicOps-float32.param.ron | 15 ++ naga/tests/in/atomicOps-float32.wgsl | 47 +++++ naga/tests/out/msl/atomicOps-float32.msl | 43 +++++ naga/tests/out/spv/atomicOps-float32.spvasm | 98 ++++++++++ naga/tests/out/wgsl/atomicOps-float32.wgsl | 35 ++++ naga/tests/snapshots.rs | 4 + tests/tests/shader/numeric_builtins.rs | 40 ++++ wgpu-core/src/device/mod.rs | 4 + wgpu-hal/src/metal/adapter.rs | 8 + wgpu-hal/src/metal/mod.rs | 1 + wgpu-hal/src/vulkan/adapter.rs | 39 ++++ wgpu-types/src/lib.rs | 13 +- 20 files changed, 633 insertions(+), 148 deletions(-) create mode 100644 naga/tests/in/atomicOps-float32.param.ron create mode 100644 naga/tests/in/atomicOps-float32.wgsl create mode 100644 naga/tests/out/msl/atomicOps-float32.msl create mode 100644 naga/tests/out/spv/atomicOps-float32.spvasm create mode 100644 naga/tests/out/wgsl/atomicOps-float32.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fb25834a9..332cb1d133 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -163,6 +163,14 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - Allow for statically linking DXC rather than including separate `.dll` files. By @DouglasDwyer in [#6574](https://github.com/gfx-rs/wgpu/pull/6574). - `DeviceType` and `AdapterInfo` now impl `Hash` by @cwfitzgerald in [#6868](https://github.com/gfx-rs/wgpu/pull/6868) +##### Vulkan + +- Allow using some 32-bit floating-point atomic operations (load, store, add, sub, exchange) in shaders. It requires the extension `VK_EXT_shader_atomic_float`. By @AsherJingkongChen in [#6234](https://github.com/gfx-rs/wgpu/pull/6234). + +##### Metal + +- Allow using some 32-bit floating-point atomic operations (load, store, add, sub, exchange) in shaders. It requires Metal 3.0+ with Apple 7, 8, 9 or Mac 2. By @AsherJingkongChen in [#6234](https://github.com/gfx-rs/wgpu/pull/6234). + #### Changes ##### Naga diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 0fbba5c737..b595d7f2f1 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2730,62 +2730,115 @@ impl BlockContext<'_> { let value_id = self.cached[value]; let value_inner = self.fun_info[value].ty.inner_with(&self.ir_module.types); + let crate::TypeInner::Scalar(scalar) = *value_inner else { + return Err(Error::FeatureNotImplemented( + "Atomics with non-scalar values", + )); + }; + let instruction = match *fun { - crate::AtomicFunction::Add => Instruction::atomic_binary( - spirv::Op::AtomicIAdd, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::Subtract => Instruction::atomic_binary( - spirv::Op::AtomicISub, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::And => Instruction::atomic_binary( - spirv::Op::AtomicAnd, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::InclusiveOr => Instruction::atomic_binary( - spirv::Op::AtomicOr, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), - crate::AtomicFunction::ExclusiveOr => Instruction::atomic_binary( - spirv::Op::AtomicXor, - result_type_id, - id, - pointer_id, - scope_constant_id, - semantics_id, - value_id, - ), + crate::AtomicFunction::Add => { + let spirv_op = match scalar.kind { + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + spirv::Op::AtomicIAdd + } + crate::ScalarKind::Float => spirv::Op::AtomicFAddEXT, + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::Subtract => { + let (spirv_op, value_id) = match scalar.kind { + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + (spirv::Op::AtomicISub, value_id) + } + crate::ScalarKind::Float => { + // HACK: SPIR-V doesn't have a atomic subtraction, + // so we add the negated value instead. + let neg_result_id = self.gen_id(); + block.body.push(Instruction::unary( + spirv::Op::FNegate, + result_type_id, + neg_result_id, + value_id, + )); + (spirv::Op::AtomicFAddEXT, neg_result_id) + } + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::And => { + let spirv_op = match scalar.kind { + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + spirv::Op::AtomicAnd + } + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::InclusiveOr => { + let spirv_op = match scalar.kind { + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + spirv::Op::AtomicOr + } + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } + crate::AtomicFunction::ExclusiveOr => { + let spirv_op = match scalar.kind { + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + spirv::Op::AtomicXor + } + _ => unimplemented!(), + }; + Instruction::atomic_binary( + spirv_op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + ) + } crate::AtomicFunction::Min => { - let spirv_op = match *value_inner { - crate::TypeInner::Scalar(crate::Scalar { - kind: crate::ScalarKind::Sint, - width: _, - }) => spirv::Op::AtomicSMin, - crate::TypeInner::Scalar(crate::Scalar { - kind: crate::ScalarKind::Uint, - width: _, - }) => spirv::Op::AtomicUMin, + let spirv_op = match scalar.kind { + crate::ScalarKind::Sint => spirv::Op::AtomicSMin, + crate::ScalarKind::Uint => spirv::Op::AtomicUMin, _ => unimplemented!(), }; Instruction::atomic_binary( @@ -2799,15 +2852,9 @@ impl BlockContext<'_> { ) } crate::AtomicFunction::Max => { - let spirv_op = match *value_inner { - crate::TypeInner::Scalar(crate::Scalar { - kind: crate::ScalarKind::Sint, - width: _, - }) => spirv::Op::AtomicSMax, - crate::TypeInner::Scalar(crate::Scalar { - kind: crate::ScalarKind::Uint, - width: _, - }) => spirv::Op::AtomicUMax, + let spirv_op = match scalar.kind { + crate::ScalarKind::Sint => spirv::Op::AtomicSMax, + crate::ScalarKind::Uint => spirv::Op::AtomicUMax, _ => unimplemented!(), }; Instruction::atomic_binary( @@ -2832,20 +2879,21 @@ impl BlockContext<'_> { ) } crate::AtomicFunction::Exchange { compare: Some(cmp) } => { - let scalar_type_id = match *value_inner { - crate::TypeInner::Scalar(scalar) => { - self.get_type_id(LookupType::Local(LocalType::Numeric( - NumericType::Scalar(scalar), - ))) - } - _ => unimplemented!(), - }; + let scalar_type_id = self.get_type_id(LookupType::Local( + LocalType::Numeric(NumericType::Scalar(scalar)), + )); let bool_type_id = self.get_type_id(LookupType::Local( LocalType::Numeric(NumericType::Scalar(crate::Scalar::BOOL)), )); let cas_result_id = self.gen_id(); let equality_result_id = self.gen_id(); + let equality_operator = match scalar.kind { + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + spirv::Op::IEqual + } + _ => unimplemented!(), + }; let mut cas_instr = Instruction::new(spirv::Op::AtomicCompareExchange); cas_instr.set_type(scalar_type_id); cas_instr.set_result(cas_result_id); @@ -2857,7 +2905,7 @@ impl BlockContext<'_> { cas_instr.add_operand(self.cached[cmp]); block.body.push(cas_instr); block.body.push(Instruction::binary( - spirv::Op::IEqual, + equality_operator, bool_type_id, equality_result_id, cas_result_id, diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 47f3ec513b..56e0029509 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -857,6 +857,16 @@ impl Writer { crate::TypeInner::Atomic(crate::Scalar { width: 8, kind: _ }) => { self.require_any("64 bit integer atomics", &[spirv::Capability::Int64Atomics])?; } + crate::TypeInner::Atomic(crate::Scalar { + width: 4, + kind: crate::ScalarKind::Float, + }) => { + self.require_any( + "32 bit floating-point atomics", + &[spirv::Capability::AtomicFloat32AddEXT], + )?; + self.use_extension("SPV_EXT_shader_atomic_float_add"); + } _ => {} } Ok(()) diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index 766ec0a8e7..c8288f5dde 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -67,6 +67,7 @@ pub const SUPPORTED_CAPABILITIES: &[spirv::Capability] = &[ spirv::Capability::Int64, spirv::Capability::Int64Atomics, spirv::Capability::Float16, + spirv::Capability::AtomicFloat32AddEXT, spirv::Capability::Float64, spirv::Capability::Geometry, spirv::Capability::MultiView, @@ -78,6 +79,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[ "SPV_KHR_storage_buffer_storage_class", "SPV_KHR_vulkan_memory_model", "SPV_KHR_multiview", + "SPV_EXT_shader_atomic_float_add", ]; pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"]; @@ -4339,7 +4341,8 @@ impl> Frontend { | Op::AtomicUMax | Op::AtomicAnd | Op::AtomicOr - | Op::AtomicXor => self.parse_atomic_expr_with_value( + | Op::AtomicXor + | Op::AtomicFAddEXT => self.parse_atomic_expr_with_value( inst, &mut emitter, ctx, @@ -4348,7 +4351,7 @@ impl> Frontend { body_idx, match inst.op { Op::AtomicExchange => crate::AtomicFunction::Exchange { compare: None }, - Op::AtomicIAdd => crate::AtomicFunction::Add, + Op::AtomicIAdd | Op::AtomicFAddEXT => crate::AtomicFunction::Add, Op::AtomicISub => crate::AtomicFunction::Subtract, Op::AtomicSMin => crate::AtomicFunction::Min, Op::AtomicUMin => crate::AtomicFunction::Min, @@ -4356,7 +4359,8 @@ impl> Frontend { Op::AtomicUMax => crate::AtomicFunction::Max, Op::AtomicAnd => crate::AtomicFunction::And, Op::AtomicOr => crate::AtomicFunction::InclusiveOr, - _ => crate::AtomicFunction::ExclusiveOr, + Op::AtomicXor => crate::AtomicFunction::ExclusiveOr, + _ => unreachable!(), }, )?, diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 8db5b676d6..d9873bfedd 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -1949,14 +1949,18 @@ pub enum Statement { /// If [`SHADER_INT64_ATOMIC_MIN_MAX`] or [`SHADER_INT64_ATOMIC_ALL_OPS`] are /// enabled, this may also be [`I64`] or [`U64`]. /// + /// If [`SHADER_FLOAT32_ATOMIC`] is enabled, this may be [`F32`]. + /// /// [`Pointer`]: TypeInner::Pointer /// [`Atomic`]: TypeInner::Atomic /// [`I32`]: Scalar::I32 /// [`U32`]: Scalar::U32 /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`SHADER_FLOAT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLOAT32_ATOMIC /// [`I64`]: Scalar::I64 /// [`U64`]: Scalar::U64 + /// [`F32`]: Scalar::F32 pointer: Handle, /// Function to run on the atomic value. @@ -1967,14 +1971,24 @@ pub enum Statement { /// value here. /// /// - The [`SHADER_INT64_ATOMIC_MIN_MAX`] capability allows - /// [`AtomicFunction::Min`] and [`AtomicFunction::Max`] here. + /// [`AtomicFunction::Min`] and [`AtomicFunction::Max`] + /// in the [`Storage`] address space here. /// /// - If neither of those capabilities are present, then 64-bit scalar /// atomics are not allowed. /// + /// If [`pointer`] refers to a 32-bit floating-point atomic value, then: + /// + /// - The [`SHADER_FLOAT32_ATOMIC`] capability allows [`AtomicFunction::Add`], + /// [`AtomicFunction::Subtract`], and [`AtomicFunction::Exchange { compare: None }`] + /// in the [`Storage`] address space here. + /// + /// [`AtomicFunction::Exchange { compare: None }`]: AtomicFunction::Exchange /// [`pointer`]: Statement::Atomic::pointer + /// [`Storage`]: AddressSpace::Storage /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`SHADER_FLOAT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLOAT32_ATOMIC fun: AtomicFunction, /// Value to use in the function. diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index 40160ce6e8..b7b96a6c7d 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -41,10 +41,12 @@ pub enum CallError { pub enum AtomicError { #[error("Pointer {0:?} to atomic is invalid.")] InvalidPointer(Handle), - #[error("Address space {0:?} does not support 64bit atomics.")] + #[error("Address space {0:?} is not supported.")] InvalidAddressSpace(crate::AddressSpace), #[error("Operand {0:?} has invalid type.")] InvalidOperand(Handle), + #[error("Operator {0:?} is not supported.")] + InvalidOperator(crate::AtomicFunction), #[error("Result expression {0:?} is not an `AtomicResult` expression")] InvalidResultExpression(Handle), #[error("Result expression {0:?} is marked as an `exchange`")] @@ -401,49 +403,88 @@ impl super::Validator { .into_other()); } - // Check for the special restrictions on 64-bit atomic operations. - // - // We don't need to consider other widths here: this function has already checked - // that `pointer`'s type is an `Atomic`, and `validate_type` has already checked - // that that `Atomic` type has a permitted scalar width. - if pointer_scalar.width == 8 { - // `Capabilities::SHADER_INT64_ATOMIC_ALL_OPS` enables all sorts of 64-bit - // atomic operations. - if self - .capabilities - .contains(super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS) - { - // okay - } else { - // `Capabilities::SHADER_INT64_ATOMIC_MIN_MAX` allows `Min` and - // `Max` on operations in `Storage`, without a return value. - if matches!( - *fun, - crate::AtomicFunction::Min | crate::AtomicFunction::Max - ) && matches!(pointer_space, crate::AddressSpace::Storage { .. }) - && result.is_none() + match pointer_scalar { + // Check for the special restrictions on 64-bit atomic operations. + // + // We don't need to consider other widths here: this function has already checked + // that `pointer`'s type is an `Atomic`, and `validate_type` has already checked + // that `Atomic` type has a permitted scalar width. + crate::Scalar::I64 | crate::Scalar::U64 => { + // `Capabilities::SHADER_INT64_ATOMIC_ALL_OPS` enables all sorts of 64-bit + // atomic operations. + if self + .capabilities + .contains(super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS) { - if !self - .capabilities - .contains(super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX) + // okay + } else { + // `Capabilities::SHADER_INT64_ATOMIC_MIN_MAX` allows `Min` and + // `Max` on operations in `Storage`, without a return value. + if matches!( + *fun, + crate::AtomicFunction::Min | crate::AtomicFunction::Max + ) && matches!(pointer_space, crate::AddressSpace::Storage { .. }) + && result.is_none() { - log::error!("Int64 min-max atomic operations are not supported"); + if !self + .capabilities + .contains(super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX) + { + log::error!("Int64 min-max atomic operations are not supported"); + return Err(AtomicError::MissingCapability( + super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, + ) + .with_span_handle(value, context.expressions) + .into_other()); + } + } else { + // Otherwise, we require the full 64-bit atomic capability. + log::error!("Int64 atomic operations are not supported"); return Err(AtomicError::MissingCapability( - super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, + super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, ) .with_span_handle(value, context.expressions) .into_other()); } - } else { - // Otherwise, we require the full 64-bit atomic capability. - log::error!("Int64 atomic operations are not supported"); + } + } + // Check for the special restrictions on 32-bit floating-point atomic operations. + crate::Scalar::F32 => { + // `Capabilities::SHADER_FLOAT32_ATOMIC` allows 32-bit floating-point + // atomic operations `Add`, `Subtract`, and `Exchange` + // in the `Storage` address space. + if !self + .capabilities + .contains(super::Capabilities::SHADER_FLOAT32_ATOMIC) + { + log::error!("Float32 atomic operations are not supported"); return Err(AtomicError::MissingCapability( - super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, + super::Capabilities::SHADER_FLOAT32_ATOMIC, ) .with_span_handle(value, context.expressions) .into_other()); } + if !matches!( + *fun, + crate::AtomicFunction::Add + | crate::AtomicFunction::Subtract + | crate::AtomicFunction::Exchange { compare: None } + ) { + log::error!("Float32 atomic operation {:?} is not supported", fun); + return Err(AtomicError::InvalidOperator(*fun) + .with_span_handle(value, context.expressions) + .into_other()); + } + if !matches!(pointer_space, crate::AddressSpace::Storage { .. }) { + log::error!( + "Float32 atomic operations are only supported in the Storage address space" + ); + return Err(AtomicError::InvalidAddressSpace(pointer_space) + .with_span_handle(value, context.expressions) + .into_other()); + } } + _ => {} } // The result expression must be appropriate to the operation. diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index 6a81bd7c2d..828c784a7a 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -81,56 +81,56 @@ bitflags::bitflags! { /// Support for [`AddressSpace::PushConstant`][1]. /// /// [1]: crate::AddressSpace::PushConstant - const PUSH_CONSTANT = 0x1; + const PUSH_CONSTANT = 1 << 0; /// Float values with width = 8. - const FLOAT64 = 0x2; + const FLOAT64 = 1 << 1; /// Support for [`BuiltIn::PrimitiveIndex`][1]. /// /// [1]: crate::BuiltIn::PrimitiveIndex - const PRIMITIVE_INDEX = 0x4; + const PRIMITIVE_INDEX = 1 << 2; /// Support for non-uniform indexing of sampled textures and storage buffer arrays. - const SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING = 0x8; + const SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING = 1 << 3; /// Support for non-uniform indexing of uniform buffers and storage texture arrays. - const UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING = 0x10; + const UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING = 1 << 4; /// Support for non-uniform indexing of samplers. - const SAMPLER_NON_UNIFORM_INDEXING = 0x20; + const SAMPLER_NON_UNIFORM_INDEXING = 1 << 5; /// Support for [`BuiltIn::ClipDistance`]. /// /// [`BuiltIn::ClipDistance`]: crate::BuiltIn::ClipDistance - const CLIP_DISTANCE = 0x40; + const CLIP_DISTANCE = 1 << 6; /// Support for [`BuiltIn::CullDistance`]. /// /// [`BuiltIn::CullDistance`]: crate::BuiltIn::CullDistance - const CULL_DISTANCE = 0x80; + const CULL_DISTANCE = 1 << 7; /// Support for 16-bit normalized storage texture formats. - const STORAGE_TEXTURE_16BIT_NORM_FORMATS = 0x100; + const STORAGE_TEXTURE_16BIT_NORM_FORMATS = 1 << 8; /// Support for [`BuiltIn::ViewIndex`]. /// /// [`BuiltIn::ViewIndex`]: crate::BuiltIn::ViewIndex - const MULTIVIEW = 0x200; + const MULTIVIEW = 1 << 9; /// Support for `early_depth_test`. - const EARLY_DEPTH_TEST = 0x400; + const EARLY_DEPTH_TEST = 1 << 10; /// Support for [`BuiltIn::SampleIndex`] and [`Sampling::Sample`]. /// /// [`BuiltIn::SampleIndex`]: crate::BuiltIn::SampleIndex /// [`Sampling::Sample`]: crate::Sampling::Sample - const MULTISAMPLED_SHADING = 0x800; + const MULTISAMPLED_SHADING = 1 << 11; /// Support for ray queries and acceleration structures. - const RAY_QUERY = 0x1000; + const RAY_QUERY = 1 << 12; /// Support for generating two sources for blending from fragment shaders. - const DUAL_SOURCE_BLENDING = 0x2000; + const DUAL_SOURCE_BLENDING = 1 << 13; /// Support for arrayed cube textures. - const CUBE_ARRAY_TEXTURES = 0x4000; + const CUBE_ARRAY_TEXTURES = 1 << 14; /// Support for 64-bit signed and unsigned integers. - const SHADER_INT64 = 0x8000; + const SHADER_INT64 = 1 << 15; /// Support for subgroup operations. /// Implies support for subgroup operations in both fragment and compute stages, /// but not necessarily in the vertex stage, which requires [`Capabilities::SUBGROUP_VERTEX_STAGE`]. - const SUBGROUP = 0x10000; + const SUBGROUP = 1 << 16; /// Support for subgroup barriers. - const SUBGROUP_BARRIER = 0x20000; + const SUBGROUP_BARRIER = 1 << 17; /// Support for subgroup operations in the vertex stage. - const SUBGROUP_VERTEX_STAGE = 0x40000; + const SUBGROUP_VERTEX_STAGE = 1 << 18; /// Support for [`AtomicFunction::Min`] and [`AtomicFunction::Max`] on /// 64-bit integers in the [`Storage`] address space, when the return /// value is not used. @@ -140,9 +140,18 @@ bitflags::bitflags! { /// [`AtomicFunction::Min`]: crate::AtomicFunction::Min /// [`AtomicFunction::Max`]: crate::AtomicFunction::Max /// [`Storage`]: crate::AddressSpace::Storage - const SHADER_INT64_ATOMIC_MIN_MAX = 0x80000; + const SHADER_INT64_ATOMIC_MIN_MAX = 1 << 19; /// Support for all atomic operations on 64-bit integers. - const SHADER_INT64_ATOMIC_ALL_OPS = 0x100000; + const SHADER_INT64_ATOMIC_ALL_OPS = 1 << 20; + /// Support for [`AtomicFunction::Add`], [`AtomicFunction::Sub`], + /// and [`AtomicFunction::Exchange { compare: None }`] on 32-bit floating-point numbers + /// in the [`Storage`] address space. + /// + /// [`AtomicFunction::Add`]: crate::AtomicFunction::Add + /// [`AtomicFunction::Sub`]: crate::AtomicFunction::Sub + /// [`AtomicFunction::Exchange { compare: None }`]: crate::AtomicFunction::Exchange + /// [`Storage`]: crate::AddressSpace::Storage + const SHADER_FLOAT32_ATOMIC = 1 << 21; } } diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 35158b8013..8c6825b842 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -370,35 +370,41 @@ impl super::Validator { Alignment::from(rows) * Alignment::from_width(scalar.width), ) } - Ti::Atomic(crate::Scalar { kind, width }) => { - match kind { - crate::ScalarKind::Bool - | crate::ScalarKind::Float - | crate::ScalarKind::AbstractInt - | crate::ScalarKind::AbstractFloat => { - return Err(TypeError::InvalidAtomicWidth(kind, width)) - } - crate::ScalarKind::Sint | crate::ScalarKind::Uint => { - if width == 8 { - if !self.capabilities.intersects( + Ti::Atomic(scalar) => { + match scalar { + crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + } => { + if scalar.width == 8 + && !self.capabilities.intersects( Capabilities::SHADER_INT64_ATOMIC_ALL_OPS | Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, - ) { - return Err(TypeError::MissingCapability( - Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, - )); - } - } else if width != 4 { - return Err(TypeError::InvalidAtomicWidth(kind, width)); + ) + { + return Err(TypeError::MissingCapability( + Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, + )); } } + crate::Scalar::F32 => { + if !self + .capabilities + .contains(Capabilities::SHADER_FLOAT32_ATOMIC) + { + return Err(TypeError::MissingCapability( + Capabilities::SHADER_FLOAT32_ATOMIC, + )); + } + } + _ => return Err(TypeError::InvalidAtomicWidth(scalar.kind, scalar.width)), }; TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::HOST_SHAREABLE | TypeFlags::CREATION_RESOLVED, - Alignment::from_width(width), + Alignment::from_width(scalar.width), ) } Ti::Pointer { base, space } => { diff --git a/naga/tests/in/atomicOps-float32.param.ron b/naga/tests/in/atomicOps-float32.param.ron new file mode 100644 index 0000000000..13919f13ef --- /dev/null +++ b/naga/tests/in/atomicOps-float32.param.ron @@ -0,0 +1,15 @@ +( + god_mode: true, + spv: ( + version: (1, 1), + capabilities: [ AtomicFloat32AddEXT ], + ), + msl: ( + lang_version: (3, 0), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: false, + ), +) diff --git a/naga/tests/in/atomicOps-float32.wgsl b/naga/tests/in/atomicOps-float32.wgsl new file mode 100644 index 0000000000..9c73df0299 --- /dev/null +++ b/naga/tests/in/atomicOps-float32.wgsl @@ -0,0 +1,47 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore(&storage_atomic_scalar, 1.5); + atomicStore(&storage_atomic_arr[1], 1.5); + atomicStore(&storage_struct.atomic_scalar, 1.5); + atomicStore(&storage_struct.atomic_arr[1], 1.5); + + workgroupBarrier(); + + let l0 = atomicLoad(&storage_atomic_scalar); + let l1 = atomicLoad(&storage_atomic_arr[1]); + let l2 = atomicLoad(&storage_struct.atomic_scalar); + let l3 = atomicLoad(&storage_struct.atomic_arr[1]); + + workgroupBarrier(); + + atomicAdd(&storage_atomic_scalar, 1.5); + atomicAdd(&storage_atomic_arr[1], 1.5); + atomicAdd(&storage_struct.atomic_scalar, 1.5); + atomicAdd(&storage_struct.atomic_arr[1], 1.5); + + workgroupBarrier(); + + atomicExchange(&storage_atomic_scalar, 1.5); + atomicExchange(&storage_atomic_arr[1], 1.5); + atomicExchange(&storage_struct.atomic_scalar, 1.5); + atomicExchange(&storage_struct.atomic_arr[1], 1.5); + + // // TODO: https://github.com/gpuweb/gpuweb/issues/2021 + // atomicCompareExchangeWeak(&storage_atomic_scalar, 1.5); + // atomicCompareExchangeWeak(&storage_atomic_arr[1], 1.5); + // atomicCompareExchangeWeak(&storage_struct.atomic_scalar, 1.5); + // atomicCompareExchangeWeak(&storage_struct.atomic_arr[1], 1.5); +} diff --git a/naga/tests/out/msl/atomicOps-float32.msl b/naga/tests/out/msl/atomicOps-float32.msl new file mode 100644 index 0000000000..40b5a7a14f --- /dev/null +++ b/naga/tests/out/msl/atomicOps-float32.msl @@ -0,0 +1,43 @@ +// language: metal3.0 +#include +#include + +using metal::uint; + +struct type_1 { + metal::atomic_float inner[2]; +}; +struct Struct { + metal::atomic_float atomic_scalar; + type_1 atomic_arr; +}; + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, device metal::atomic_float& storage_atomic_scalar [[user(fake0)]] +, device type_1& storage_atomic_arr [[user(fake0)]] +, device Struct& storage_struct [[user(fake0)]] +) { + metal::atomic_store_explicit(&storage_atomic_scalar, 1.5, metal::memory_order_relaxed); + metal::atomic_store_explicit(&storage_atomic_arr.inner[1], 1.5, metal::memory_order_relaxed); + metal::atomic_store_explicit(&storage_struct.atomic_scalar, 1.5, metal::memory_order_relaxed); + metal::atomic_store_explicit(&storage_struct.atomic_arr.inner[1], 1.5, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float l0_ = metal::atomic_load_explicit(&storage_atomic_scalar, metal::memory_order_relaxed); + float l1_ = metal::atomic_load_explicit(&storage_atomic_arr.inner[1], metal::memory_order_relaxed); + float l2_ = metal::atomic_load_explicit(&storage_struct.atomic_scalar, metal::memory_order_relaxed); + float l3_ = metal::atomic_load_explicit(&storage_struct.atomic_arr.inner[1], metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float _e27 = metal::atomic_fetch_add_explicit(&storage_atomic_scalar, 1.5, metal::memory_order_relaxed); + float _e31 = metal::atomic_fetch_add_explicit(&storage_atomic_arr.inner[1], 1.5, metal::memory_order_relaxed); + float _e35 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_scalar, 1.5, metal::memory_order_relaxed); + float _e40 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_arr.inner[1], 1.5, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float _e43 = metal::atomic_exchange_explicit(&storage_atomic_scalar, 1.5, metal::memory_order_relaxed); + float _e47 = metal::atomic_exchange_explicit(&storage_atomic_arr.inner[1], 1.5, metal::memory_order_relaxed); + float _e51 = metal::atomic_exchange_explicit(&storage_struct.atomic_scalar, 1.5, metal::memory_order_relaxed); + float _e56 = metal::atomic_exchange_explicit(&storage_struct.atomic_arr.inner[1], 1.5, metal::memory_order_relaxed); + return; +} diff --git a/naga/tests/out/spv/atomicOps-float32.spvasm b/naga/tests/out/spv/atomicOps-float32.spvasm new file mode 100644 index 0000000000..135274b0d8 --- /dev/null +++ b/naga/tests/out/spv/atomicOps-float32.spvasm @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 62 +OpCapability Shader +OpCapability AtomicFloat32AddEXT +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpExtension "SPV_EXT_shader_atomic_float_add" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %22 "cs_main" %19 +OpExecutionMode %22 LocalSize 2 1 1 +OpDecorate %4 ArrayStride 4 +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 4 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 1 +OpDecorate %13 Block +OpMemberDecorate %13 0 Offset 0 +OpDecorate %15 DescriptorSet 0 +OpDecorate %15 Binding 2 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +OpDecorate %19 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeFloat 32 +%6 = OpTypeInt 32 0 +%5 = OpConstant %6 2 +%4 = OpTypeArray %3 %5 +%7 = OpTypeStruct %3 %4 +%8 = OpTypeVector %6 3 +%10 = OpTypeStruct %3 +%11 = OpTypePointer StorageBuffer %10 +%9 = OpVariable %11 StorageBuffer +%13 = OpTypeStruct %4 +%14 = OpTypePointer StorageBuffer %13 +%12 = OpVariable %14 StorageBuffer +%16 = OpTypeStruct %7 +%17 = OpTypePointer StorageBuffer %16 +%15 = OpVariable %17 StorageBuffer +%20 = OpTypePointer Input %8 +%19 = OpVariable %20 Input +%23 = OpTypeFunction %2 +%24 = OpTypePointer StorageBuffer %3 +%25 = OpConstant %6 0 +%27 = OpTypePointer StorageBuffer %4 +%29 = OpTypePointer StorageBuffer %7 +%31 = OpConstant %3 1.5 +%34 = OpTypeInt 32 1 +%33 = OpConstant %34 1 +%35 = OpConstant %6 64 +%36 = OpConstant %6 1 +%40 = OpConstant %6 264 +%22 = OpFunction %2 None %23 +%18 = OpLabel +%21 = OpLoad %8 %19 +%26 = OpAccessChain %24 %9 %25 +%28 = OpAccessChain %27 %12 %25 +%30 = OpAccessChain %29 %15 %25 +OpBranch %32 +%32 = OpLabel +OpAtomicStore %26 %33 %35 %31 +%37 = OpAccessChain %24 %28 %36 +OpAtomicStore %37 %33 %35 %31 +%38 = OpAccessChain %24 %30 %25 +OpAtomicStore %38 %33 %35 %31 +%39 = OpAccessChain %24 %30 %36 %36 +OpAtomicStore %39 %33 %35 %31 +OpControlBarrier %5 %5 %40 +%41 = OpAtomicLoad %3 %26 %33 %35 +%42 = OpAccessChain %24 %28 %36 +%43 = OpAtomicLoad %3 %42 %33 %35 +%44 = OpAccessChain %24 %30 %25 +%45 = OpAtomicLoad %3 %44 %33 %35 +%46 = OpAccessChain %24 %30 %36 %36 +%47 = OpAtomicLoad %3 %46 %33 %35 +OpControlBarrier %5 %5 %40 +%48 = OpAtomicFAddEXT %3 %26 %33 %35 %31 +%50 = OpAccessChain %24 %28 %36 +%49 = OpAtomicFAddEXT %3 %50 %33 %35 %31 +%52 = OpAccessChain %24 %30 %25 +%51 = OpAtomicFAddEXT %3 %52 %33 %35 %31 +%54 = OpAccessChain %24 %30 %36 %36 +%53 = OpAtomicFAddEXT %3 %54 %33 %35 %31 +OpControlBarrier %5 %5 %40 +%55 = OpAtomicExchange %3 %26 %33 %35 %31 +%57 = OpAccessChain %24 %28 %36 +%56 = OpAtomicExchange %3 %57 %33 %35 %31 +%59 = OpAccessChain %24 %30 %25 +%58 = OpAtomicExchange %3 %59 %33 %35 %31 +%61 = OpAccessChain %24 %30 %36 %36 +%60 = OpAtomicExchange %3 %61 %33 %35 %31 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/atomicOps-float32.wgsl b/naga/tests/out/wgsl/atomicOps-float32.wgsl new file mode 100644 index 0000000000..92e67b3428 --- /dev/null +++ b/naga/tests/out/wgsl/atomicOps-float32.wgsl @@ -0,0 +1,35 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore((&storage_atomic_scalar), 1.5f); + atomicStore((&storage_atomic_arr[1]), 1.5f); + atomicStore((&storage_struct.atomic_scalar), 1.5f); + atomicStore((&storage_struct.atomic_arr[1]), 1.5f); + workgroupBarrier(); + let l0_ = atomicLoad((&storage_atomic_scalar)); + let l1_ = atomicLoad((&storage_atomic_arr[1])); + let l2_ = atomicLoad((&storage_struct.atomic_scalar)); + let l3_ = atomicLoad((&storage_struct.atomic_arr[1])); + workgroupBarrier(); + let _e27 = atomicAdd((&storage_atomic_scalar), 1.5f); + let _e31 = atomicAdd((&storage_atomic_arr[1]), 1.5f); + let _e35 = atomicAdd((&storage_struct.atomic_scalar), 1.5f); + let _e40 = atomicAdd((&storage_struct.atomic_arr[1]), 1.5f); + workgroupBarrier(); + let _e43 = atomicExchange((&storage_atomic_scalar), 1.5f); + let _e47 = atomicExchange((&storage_atomic_arr[1]), 1.5f); + let _e51 = atomicExchange((&storage_struct.atomic_scalar), 1.5f); + let _e56 = atomicExchange((&storage_struct.atomic_arr[1]), 1.5f); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 6a234a0977..b6ab1046a7 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -788,6 +788,10 @@ fn convert_wgsl() { "atomicOps-int64-min-max", Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, ), + ( + "atomicOps-float32", + Targets::SPIRV | Targets::METAL | Targets::WGSL, + ), ( "atomicCompareExchange-int64", Targets::SPIRV | Targets::WGSL, diff --git a/tests/tests/shader/numeric_builtins.rs b/tests/tests/shader/numeric_builtins.rs index f6cb0bb39f..db8461a3a0 100644 --- a/tests/tests/shader/numeric_builtins.rs +++ b/tests/tests/shader/numeric_builtins.rs @@ -151,6 +151,46 @@ static INT64_ATOMIC_ALL_OPS: GpuTestConfiguration = GpuTestConfiguration::new() ) }); +fn create_float32_atomic_test() -> Vec { + let mut tests = Vec::new(); + + let test = ShaderTest::new( + "atomicAdd".into(), + "value: f32".into(), + "atomicStore(&output, 0.0); atomicAdd(&output, -0.50); atomicAdd(&output, 1.75);".into(), + &[0_f32], + &[1.25_f32], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicAdd".into(), + "value: f32".into(), + "atomicStore(&output, 0.0); atomicSub(&output, -2.5); atomicSub(&output, 3.0);".into(), + &[0_f32], + &[-0.5_f32], + ) + .output_type("atomic".into()); + + tests.push(test); + + tests +} + +#[gpu_test] +static FLOAT32_ATOMIC: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgt::Features::SHADER_FLOAT32_ATOMIC) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test(ctx, InputStorageType::Storage, create_float32_atomic_test()) + }); + // See https://github.com/gfx-rs/wgpu/issues/5276 /* fn create_int64_polyfill_test() -> Vec { diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index b1b8c344bd..14ff1ec9b3 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -425,6 +425,10 @@ pub fn create_validator( Caps::SHADER_INT64_ATOMIC_ALL_OPS, features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS), ); + caps.set( + Caps::SHADER_FLOAT32_ATOMIC, + features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC), + ); caps.set( Caps::MULTISAMPLED_SHADING, downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 21d34001a8..c2a9541bee 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -838,6 +838,10 @@ impl super::PrivateCapabilities { && ((device.supports_family(MTLGPUFamily::Apple8) && device.supports_family(MTLGPUFamily::Mac2)) || device.supports_family(MTLGPUFamily::Apple9)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6 + float_atomics: family_check + && (device.supports_family(MTLGPUFamily::Apple7) + || device.supports_family(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), } } @@ -913,6 +917,10 @@ impl super::PrivateCapabilities { F::SHADER_INT64_ATOMIC_MIN_MAX, self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, ); + features.set( + F::SHADER_FLOAT32_ATOMIC, + self.float_atomics && self.msl_version >= MTLLanguageVersion::V3_0, + ); features.set( F::ADDRESS_MODE_CLAMP_TO_BORDER, diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 448349e2b0..abc600922e 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -290,6 +290,7 @@ struct PrivateCapabilities { supports_simd_scoped_operations: bool, int64: bool, int64_atomics: bool, + float_atomics: bool, supports_shared_event: bool, } diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 7269bf3ecc..11dfb6651f 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -109,6 +109,9 @@ pub struct PhysicalDeviceFeatures { /// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2. shader_atomic_int64: Option>, + /// Features provided by `VK_EXT_shader_atomic_float`. + shader_atomic_float: Option>, + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. subgroup_size_control: Option>, } @@ -157,6 +160,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.shader_atomic_int64 { info = info.push_next(feature); } + if let Some(ref mut feature) = self.shader_atomic_float { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.subgroup_size_control { info = info.push_next(feature); } @@ -438,6 +444,16 @@ impl PhysicalDeviceFeatures { } else { None }, + shader_atomic_float: if enabled_extensions.contains(&ext::shader_atomic_float::NAME) { + let needed = requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC); + Some( + vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT::default() + .shader_buffer_float32_atomics(needed) + .shader_buffer_float32_atomic_add(needed), + ) + } else { + None + }, subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&ext::subgroup_size_control::NAME) { @@ -582,6 +598,14 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref shader_atomic_float) = self.shader_atomic_float { + features.set( + F::SHADER_FLOAT32_ATOMIC, + shader_atomic_float.shader_buffer_float32_atomics != 0 + && shader_atomic_float.shader_buffer_float32_atomic_add != 0, + ); + } + //if caps.supports_extension(khr::sampler_mirror_clamp_to_edge::NAME) { //if caps.supports_extension(ext::sampler_filter_minmax::NAME) { features.set( @@ -994,6 +1018,11 @@ impl PhysicalDeviceProperties { extensions.push(khr::shader_atomic_int64::NAME); } + // Require `VK_EXT_shader_atomic_float` if the associated feature was requested + if requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) { + extensions.push(ext::shader_atomic_float::NAME); + } + // Require VK_GOOGLE_display_timing if the associated feature was requested if requested_features.contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING) { extensions.push(google::display_timing::NAME); @@ -1289,6 +1318,12 @@ impl super::InstanceShared { features2 = features2.push_next(next); } + if capabilities.supports_extension(ext::shader_atomic_float::NAME) { + let next = features + .shader_atomic_float + .insert(vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT::default()); + features2 = features2.push_next(next); + } if capabilities.supports_extension(ext::image_robustness::NAME) { let next = features .image_robustness @@ -1784,6 +1819,10 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64Atomics); } + if features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) { + capabilities.push(spv::Capability::AtomicFloat32AddEXT); + } + let mut flags = spv::WriterFlags::empty(); flags.set( spv::WriterFlags::DEBUG, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 7753de289a..0f7d9836a2 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -405,6 +405,17 @@ bitflags::bitflags! { // Native Features: // + /// Allows shaders to use f32 atomic load, store, add, sub, and exchange. + /// + /// Supported platforms: + /// - Metal (with MSL 3.0+ and Apple7+/Mac2) + /// - Vulkan (with [VK_EXT_shader_atomic_float]) + /// + /// This is a native only feature. + /// + /// [VK_EXT_shader_atomic_float]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_shader_atomic_float.html + const SHADER_FLOAT32_ATOMIC = 1 << 19; + // The features starting with a ? are features that might become part of the spec or // at the very least we can implement as native features; since they should cover all // possible formats and capabilities across backends. @@ -964,7 +975,7 @@ impl Features { /// Mask of all features which are part of the upstream WebGPU standard. #[must_use] pub const fn all_webgpu_mask() -> Self { - Self::from_bits_truncate(0xFFFFF) + Self::from_bits_truncate(0x7FFFF) } /// Mask of all features that are only available when targeting native (not web).