feat: Add 32-bit floating-point atomics (SHADER_FLOAT32_ATOMIC) (#6234)

* feat: Add 32-bit floating-point atomics

* Current supported platforms: Metal
* Platforms to support in the future: Vulkan

Related issues or PRs:

* gfx-rs/wgpu#1020

* Add changelog

* Edit changelog

* feat: Add 32-bit float atomics support for Vulkan (SPIR-V shaders)

* atomicSub for f32 in the previous commits is removed.

* Update test

* chore: doc type link

* refactor: Revise float atomics on msl and spv

* Make branches tidy
* Also revise old codes
* Ensure the implementations are supported by Metal and Vulkan backends

* refactor: Renaming flt32 atomics to float32 atomics

* chore: Add link to Vulkan feature

* fix: cargo fmt

* chore: hack comment

* Revert changelog

* Fix: Cargo advisory

* Update wgpu-hal/src/metal/adapter.rs

Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>

* Update naga/src/lib.rs

Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>

* Adjust feature flag position

---------

Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
This commit is contained in:
Asher Jingkong Chen
2025-01-10 00:03:47 +08:00
committed by GitHub
parent 6e2394b95e
commit 198762e5cc
20 changed files with 633 additions and 148 deletions

View File

@@ -163,6 +163,14 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849).
- Allow for statically linking DXC rather than including separate `.dll` files. By @DouglasDwyer in [#6574](https://github.com/gfx-rs/wgpu/pull/6574).
- `DeviceType` and `AdapterInfo` now impl `Hash` by @cwfitzgerald in [#6868](https://github.com/gfx-rs/wgpu/pull/6868)
##### Vulkan
- Allow using some 32-bit floating-point atomic operations (load, store, add, sub, exchange) in shaders. It requires the extension `VK_EXT_shader_atomic_float`. By @AsherJingkongChen in [#6234](https://github.com/gfx-rs/wgpu/pull/6234).
##### Metal
- Allow using some 32-bit floating-point atomic operations (load, store, add, sub, exchange) in shaders. It requires Metal 3.0+ with Apple 7, 8, 9 or Mac 2. By @AsherJingkongChen in [#6234](https://github.com/gfx-rs/wgpu/pull/6234).
#### Changes
##### Naga

View File

@@ -2730,62 +2730,115 @@ impl BlockContext<'_> {
let value_id = self.cached[value];
let value_inner = self.fun_info[value].ty.inner_with(&self.ir_module.types);
let crate::TypeInner::Scalar(scalar) = *value_inner else {
return Err(Error::FeatureNotImplemented(
"Atomics with non-scalar values",
));
};
let instruction = match *fun {
crate::AtomicFunction::Add => Instruction::atomic_binary(
spirv::Op::AtomicIAdd,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
),
crate::AtomicFunction::Subtract => Instruction::atomic_binary(
spirv::Op::AtomicISub,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
),
crate::AtomicFunction::And => Instruction::atomic_binary(
spirv::Op::AtomicAnd,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
),
crate::AtomicFunction::InclusiveOr => Instruction::atomic_binary(
spirv::Op::AtomicOr,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
),
crate::AtomicFunction::ExclusiveOr => Instruction::atomic_binary(
spirv::Op::AtomicXor,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
),
crate::AtomicFunction::Add => {
let spirv_op = match scalar.kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
spirv::Op::AtomicIAdd
}
crate::ScalarKind::Float => spirv::Op::AtomicFAddEXT,
_ => unimplemented!(),
};
Instruction::atomic_binary(
spirv_op,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
)
}
crate::AtomicFunction::Subtract => {
let (spirv_op, value_id) = match scalar.kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
(spirv::Op::AtomicISub, value_id)
}
crate::ScalarKind::Float => {
// HACK: SPIR-V doesn't have a atomic subtraction,
// so we add the negated value instead.
let neg_result_id = self.gen_id();
block.body.push(Instruction::unary(
spirv::Op::FNegate,
result_type_id,
neg_result_id,
value_id,
));
(spirv::Op::AtomicFAddEXT, neg_result_id)
}
_ => unimplemented!(),
};
Instruction::atomic_binary(
spirv_op,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
)
}
crate::AtomicFunction::And => {
let spirv_op = match scalar.kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
spirv::Op::AtomicAnd
}
_ => unimplemented!(),
};
Instruction::atomic_binary(
spirv_op,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
)
}
crate::AtomicFunction::InclusiveOr => {
let spirv_op = match scalar.kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
spirv::Op::AtomicOr
}
_ => unimplemented!(),
};
Instruction::atomic_binary(
spirv_op,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
)
}
crate::AtomicFunction::ExclusiveOr => {
let spirv_op = match scalar.kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
spirv::Op::AtomicXor
}
_ => unimplemented!(),
};
Instruction::atomic_binary(
spirv_op,
result_type_id,
id,
pointer_id,
scope_constant_id,
semantics_id,
value_id,
)
}
crate::AtomicFunction::Min => {
let spirv_op = match *value_inner {
crate::TypeInner::Scalar(crate::Scalar {
kind: crate::ScalarKind::Sint,
width: _,
}) => spirv::Op::AtomicSMin,
crate::TypeInner::Scalar(crate::Scalar {
kind: crate::ScalarKind::Uint,
width: _,
}) => spirv::Op::AtomicUMin,
let spirv_op = match scalar.kind {
crate::ScalarKind::Sint => spirv::Op::AtomicSMin,
crate::ScalarKind::Uint => spirv::Op::AtomicUMin,
_ => unimplemented!(),
};
Instruction::atomic_binary(
@@ -2799,15 +2852,9 @@ impl BlockContext<'_> {
)
}
crate::AtomicFunction::Max => {
let spirv_op = match *value_inner {
crate::TypeInner::Scalar(crate::Scalar {
kind: crate::ScalarKind::Sint,
width: _,
}) => spirv::Op::AtomicSMax,
crate::TypeInner::Scalar(crate::Scalar {
kind: crate::ScalarKind::Uint,
width: _,
}) => spirv::Op::AtomicUMax,
let spirv_op = match scalar.kind {
crate::ScalarKind::Sint => spirv::Op::AtomicSMax,
crate::ScalarKind::Uint => spirv::Op::AtomicUMax,
_ => unimplemented!(),
};
Instruction::atomic_binary(
@@ -2832,20 +2879,21 @@ impl BlockContext<'_> {
)
}
crate::AtomicFunction::Exchange { compare: Some(cmp) } => {
let scalar_type_id = match *value_inner {
crate::TypeInner::Scalar(scalar) => {
self.get_type_id(LookupType::Local(LocalType::Numeric(
NumericType::Scalar(scalar),
)))
}
_ => unimplemented!(),
};
let scalar_type_id = self.get_type_id(LookupType::Local(
LocalType::Numeric(NumericType::Scalar(scalar)),
));
let bool_type_id = self.get_type_id(LookupType::Local(
LocalType::Numeric(NumericType::Scalar(crate::Scalar::BOOL)),
));
let cas_result_id = self.gen_id();
let equality_result_id = self.gen_id();
let equality_operator = match scalar.kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
spirv::Op::IEqual
}
_ => unimplemented!(),
};
let mut cas_instr = Instruction::new(spirv::Op::AtomicCompareExchange);
cas_instr.set_type(scalar_type_id);
cas_instr.set_result(cas_result_id);
@@ -2857,7 +2905,7 @@ impl BlockContext<'_> {
cas_instr.add_operand(self.cached[cmp]);
block.body.push(cas_instr);
block.body.push(Instruction::binary(
spirv::Op::IEqual,
equality_operator,
bool_type_id,
equality_result_id,
cas_result_id,

View File

@@ -857,6 +857,16 @@ impl Writer {
crate::TypeInner::Atomic(crate::Scalar { width: 8, kind: _ }) => {
self.require_any("64 bit integer atomics", &[spirv::Capability::Int64Atomics])?;
}
crate::TypeInner::Atomic(crate::Scalar {
width: 4,
kind: crate::ScalarKind::Float,
}) => {
self.require_any(
"32 bit floating-point atomics",
&[spirv::Capability::AtomicFloat32AddEXT],
)?;
self.use_extension("SPV_EXT_shader_atomic_float_add");
}
_ => {}
}
Ok(())

View File

@@ -67,6 +67,7 @@ pub const SUPPORTED_CAPABILITIES: &[spirv::Capability] = &[
spirv::Capability::Int64,
spirv::Capability::Int64Atomics,
spirv::Capability::Float16,
spirv::Capability::AtomicFloat32AddEXT,
spirv::Capability::Float64,
spirv::Capability::Geometry,
spirv::Capability::MultiView,
@@ -78,6 +79,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
"SPV_KHR_storage_buffer_storage_class",
"SPV_KHR_vulkan_memory_model",
"SPV_KHR_multiview",
"SPV_EXT_shader_atomic_float_add",
];
pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];
@@ -4339,7 +4341,8 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
| Op::AtomicUMax
| Op::AtomicAnd
| Op::AtomicOr
| Op::AtomicXor => self.parse_atomic_expr_with_value(
| Op::AtomicXor
| Op::AtomicFAddEXT => self.parse_atomic_expr_with_value(
inst,
&mut emitter,
ctx,
@@ -4348,7 +4351,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
body_idx,
match inst.op {
Op::AtomicExchange => crate::AtomicFunction::Exchange { compare: None },
Op::AtomicIAdd => crate::AtomicFunction::Add,
Op::AtomicIAdd | Op::AtomicFAddEXT => crate::AtomicFunction::Add,
Op::AtomicISub => crate::AtomicFunction::Subtract,
Op::AtomicSMin => crate::AtomicFunction::Min,
Op::AtomicUMin => crate::AtomicFunction::Min,
@@ -4356,7 +4359,8 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
Op::AtomicUMax => crate::AtomicFunction::Max,
Op::AtomicAnd => crate::AtomicFunction::And,
Op::AtomicOr => crate::AtomicFunction::InclusiveOr,
_ => crate::AtomicFunction::ExclusiveOr,
Op::AtomicXor => crate::AtomicFunction::ExclusiveOr,
_ => unreachable!(),
},
)?,

View File

@@ -1949,14 +1949,18 @@ pub enum Statement {
/// If [`SHADER_INT64_ATOMIC_MIN_MAX`] or [`SHADER_INT64_ATOMIC_ALL_OPS`] are
/// enabled, this may also be [`I64`] or [`U64`].
///
/// If [`SHADER_FLOAT32_ATOMIC`] is enabled, this may be [`F32`].
///
/// [`Pointer`]: TypeInner::Pointer
/// [`Atomic`]: TypeInner::Atomic
/// [`I32`]: Scalar::I32
/// [`U32`]: Scalar::U32
/// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX
/// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS
/// [`SHADER_FLOAT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLOAT32_ATOMIC
/// [`I64`]: Scalar::I64
/// [`U64`]: Scalar::U64
/// [`F32`]: Scalar::F32
pointer: Handle<Expression>,
/// Function to run on the atomic value.
@@ -1967,14 +1971,24 @@ pub enum Statement {
/// value here.
///
/// - The [`SHADER_INT64_ATOMIC_MIN_MAX`] capability allows
/// [`AtomicFunction::Min`] and [`AtomicFunction::Max`] here.
/// [`AtomicFunction::Min`] and [`AtomicFunction::Max`]
/// in the [`Storage`] address space here.
///
/// - If neither of those capabilities are present, then 64-bit scalar
/// atomics are not allowed.
///
/// If [`pointer`] refers to a 32-bit floating-point atomic value, then:
///
/// - The [`SHADER_FLOAT32_ATOMIC`] capability allows [`AtomicFunction::Add`],
/// [`AtomicFunction::Subtract`], and [`AtomicFunction::Exchange { compare: None }`]
/// in the [`Storage`] address space here.
///
/// [`AtomicFunction::Exchange { compare: None }`]: AtomicFunction::Exchange
/// [`pointer`]: Statement::Atomic::pointer
/// [`Storage`]: AddressSpace::Storage
/// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX
/// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS
/// [`SHADER_FLOAT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLOAT32_ATOMIC
fun: AtomicFunction,
/// Value to use in the function.

View File

@@ -41,10 +41,12 @@ pub enum CallError {
pub enum AtomicError {
#[error("Pointer {0:?} to atomic is invalid.")]
InvalidPointer(Handle<crate::Expression>),
#[error("Address space {0:?} does not support 64bit atomics.")]
#[error("Address space {0:?} is not supported.")]
InvalidAddressSpace(crate::AddressSpace),
#[error("Operand {0:?} has invalid type.")]
InvalidOperand(Handle<crate::Expression>),
#[error("Operator {0:?} is not supported.")]
InvalidOperator(crate::AtomicFunction),
#[error("Result expression {0:?} is not an `AtomicResult` expression")]
InvalidResultExpression(Handle<crate::Expression>),
#[error("Result expression {0:?} is marked as an `exchange`")]
@@ -401,49 +403,88 @@ impl super::Validator {
.into_other());
}
// Check for the special restrictions on 64-bit atomic operations.
//
// We don't need to consider other widths here: this function has already checked
// that `pointer`'s type is an `Atomic`, and `validate_type` has already checked
// that that `Atomic` type has a permitted scalar width.
if pointer_scalar.width == 8 {
// `Capabilities::SHADER_INT64_ATOMIC_ALL_OPS` enables all sorts of 64-bit
// atomic operations.
if self
.capabilities
.contains(super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS)
{
// okay
} else {
// `Capabilities::SHADER_INT64_ATOMIC_MIN_MAX` allows `Min` and
// `Max` on operations in `Storage`, without a return value.
if matches!(
*fun,
crate::AtomicFunction::Min | crate::AtomicFunction::Max
) && matches!(pointer_space, crate::AddressSpace::Storage { .. })
&& result.is_none()
match pointer_scalar {
// Check for the special restrictions on 64-bit atomic operations.
//
// We don't need to consider other widths here: this function has already checked
// that `pointer`'s type is an `Atomic`, and `validate_type` has already checked
// that `Atomic` type has a permitted scalar width.
crate::Scalar::I64 | crate::Scalar::U64 => {
// `Capabilities::SHADER_INT64_ATOMIC_ALL_OPS` enables all sorts of 64-bit
// atomic operations.
if self
.capabilities
.contains(super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS)
{
if !self
.capabilities
.contains(super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX)
// okay
} else {
// `Capabilities::SHADER_INT64_ATOMIC_MIN_MAX` allows `Min` and
// `Max` on operations in `Storage`, without a return value.
if matches!(
*fun,
crate::AtomicFunction::Min | crate::AtomicFunction::Max
) && matches!(pointer_space, crate::AddressSpace::Storage { .. })
&& result.is_none()
{
log::error!("Int64 min-max atomic operations are not supported");
if !self
.capabilities
.contains(super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX)
{
log::error!("Int64 min-max atomic operations are not supported");
return Err(AtomicError::MissingCapability(
super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX,
)
.with_span_handle(value, context.expressions)
.into_other());
}
} else {
// Otherwise, we require the full 64-bit atomic capability.
log::error!("Int64 atomic operations are not supported");
return Err(AtomicError::MissingCapability(
super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX,
super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS,
)
.with_span_handle(value, context.expressions)
.into_other());
}
} else {
// Otherwise, we require the full 64-bit atomic capability.
log::error!("Int64 atomic operations are not supported");
}
}
// Check for the special restrictions on 32-bit floating-point atomic operations.
crate::Scalar::F32 => {
// `Capabilities::SHADER_FLOAT32_ATOMIC` allows 32-bit floating-point
// atomic operations `Add`, `Subtract`, and `Exchange`
// in the `Storage` address space.
if !self
.capabilities
.contains(super::Capabilities::SHADER_FLOAT32_ATOMIC)
{
log::error!("Float32 atomic operations are not supported");
return Err(AtomicError::MissingCapability(
super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS,
super::Capabilities::SHADER_FLOAT32_ATOMIC,
)
.with_span_handle(value, context.expressions)
.into_other());
}
if !matches!(
*fun,
crate::AtomicFunction::Add
| crate::AtomicFunction::Subtract
| crate::AtomicFunction::Exchange { compare: None }
) {
log::error!("Float32 atomic operation {:?} is not supported", fun);
return Err(AtomicError::InvalidOperator(*fun)
.with_span_handle(value, context.expressions)
.into_other());
}
if !matches!(pointer_space, crate::AddressSpace::Storage { .. }) {
log::error!(
"Float32 atomic operations are only supported in the Storage address space"
);
return Err(AtomicError::InvalidAddressSpace(pointer_space)
.with_span_handle(value, context.expressions)
.into_other());
}
}
_ => {}
}
// The result expression must be appropriate to the operation.

View File

@@ -81,56 +81,56 @@ bitflags::bitflags! {
/// Support for [`AddressSpace::PushConstant`][1].
///
/// [1]: crate::AddressSpace::PushConstant
const PUSH_CONSTANT = 0x1;
const PUSH_CONSTANT = 1 << 0;
/// Float values with width = 8.
const FLOAT64 = 0x2;
const FLOAT64 = 1 << 1;
/// Support for [`BuiltIn::PrimitiveIndex`][1].
///
/// [1]: crate::BuiltIn::PrimitiveIndex
const PRIMITIVE_INDEX = 0x4;
const PRIMITIVE_INDEX = 1 << 2;
/// Support for non-uniform indexing of sampled textures and storage buffer arrays.
const SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING = 0x8;
const SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING = 1 << 3;
/// Support for non-uniform indexing of uniform buffers and storage texture arrays.
const UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING = 0x10;
const UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING = 1 << 4;
/// Support for non-uniform indexing of samplers.
const SAMPLER_NON_UNIFORM_INDEXING = 0x20;
const SAMPLER_NON_UNIFORM_INDEXING = 1 << 5;
/// Support for [`BuiltIn::ClipDistance`].
///
/// [`BuiltIn::ClipDistance`]: crate::BuiltIn::ClipDistance
const CLIP_DISTANCE = 0x40;
const CLIP_DISTANCE = 1 << 6;
/// Support for [`BuiltIn::CullDistance`].
///
/// [`BuiltIn::CullDistance`]: crate::BuiltIn::CullDistance
const CULL_DISTANCE = 0x80;
const CULL_DISTANCE = 1 << 7;
/// Support for 16-bit normalized storage texture formats.
const STORAGE_TEXTURE_16BIT_NORM_FORMATS = 0x100;
const STORAGE_TEXTURE_16BIT_NORM_FORMATS = 1 << 8;
/// Support for [`BuiltIn::ViewIndex`].
///
/// [`BuiltIn::ViewIndex`]: crate::BuiltIn::ViewIndex
const MULTIVIEW = 0x200;
const MULTIVIEW = 1 << 9;
/// Support for `early_depth_test`.
const EARLY_DEPTH_TEST = 0x400;
const EARLY_DEPTH_TEST = 1 << 10;
/// Support for [`BuiltIn::SampleIndex`] and [`Sampling::Sample`].
///
/// [`BuiltIn::SampleIndex`]: crate::BuiltIn::SampleIndex
/// [`Sampling::Sample`]: crate::Sampling::Sample
const MULTISAMPLED_SHADING = 0x800;
const MULTISAMPLED_SHADING = 1 << 11;
/// Support for ray queries and acceleration structures.
const RAY_QUERY = 0x1000;
const RAY_QUERY = 1 << 12;
/// Support for generating two sources for blending from fragment shaders.
const DUAL_SOURCE_BLENDING = 0x2000;
const DUAL_SOURCE_BLENDING = 1 << 13;
/// Support for arrayed cube textures.
const CUBE_ARRAY_TEXTURES = 0x4000;
const CUBE_ARRAY_TEXTURES = 1 << 14;
/// Support for 64-bit signed and unsigned integers.
const SHADER_INT64 = 0x8000;
const SHADER_INT64 = 1 << 15;
/// Support for subgroup operations.
/// Implies support for subgroup operations in both fragment and compute stages,
/// but not necessarily in the vertex stage, which requires [`Capabilities::SUBGROUP_VERTEX_STAGE`].
const SUBGROUP = 0x10000;
const SUBGROUP = 1 << 16;
/// Support for subgroup barriers.
const SUBGROUP_BARRIER = 0x20000;
const SUBGROUP_BARRIER = 1 << 17;
/// Support for subgroup operations in the vertex stage.
const SUBGROUP_VERTEX_STAGE = 0x40000;
const SUBGROUP_VERTEX_STAGE = 1 << 18;
/// Support for [`AtomicFunction::Min`] and [`AtomicFunction::Max`] on
/// 64-bit integers in the [`Storage`] address space, when the return
/// value is not used.
@@ -140,9 +140,18 @@ bitflags::bitflags! {
/// [`AtomicFunction::Min`]: crate::AtomicFunction::Min
/// [`AtomicFunction::Max`]: crate::AtomicFunction::Max
/// [`Storage`]: crate::AddressSpace::Storage
const SHADER_INT64_ATOMIC_MIN_MAX = 0x80000;
const SHADER_INT64_ATOMIC_MIN_MAX = 1 << 19;
/// Support for all atomic operations on 64-bit integers.
const SHADER_INT64_ATOMIC_ALL_OPS = 0x100000;
const SHADER_INT64_ATOMIC_ALL_OPS = 1 << 20;
/// Support for [`AtomicFunction::Add`], [`AtomicFunction::Sub`],
/// and [`AtomicFunction::Exchange { compare: None }`] on 32-bit floating-point numbers
/// in the [`Storage`] address space.
///
/// [`AtomicFunction::Add`]: crate::AtomicFunction::Add
/// [`AtomicFunction::Sub`]: crate::AtomicFunction::Sub
/// [`AtomicFunction::Exchange { compare: None }`]: crate::AtomicFunction::Exchange
/// [`Storage`]: crate::AddressSpace::Storage
const SHADER_FLOAT32_ATOMIC = 1 << 21;
}
}

View File

@@ -370,35 +370,41 @@ impl super::Validator {
Alignment::from(rows) * Alignment::from_width(scalar.width),
)
}
Ti::Atomic(crate::Scalar { kind, width }) => {
match kind {
crate::ScalarKind::Bool
| crate::ScalarKind::Float
| crate::ScalarKind::AbstractInt
| crate::ScalarKind::AbstractFloat => {
return Err(TypeError::InvalidAtomicWidth(kind, width))
}
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
if width == 8 {
if !self.capabilities.intersects(
Ti::Atomic(scalar) => {
match scalar {
crate::Scalar {
kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
width: _,
} => {
if scalar.width == 8
&& !self.capabilities.intersects(
Capabilities::SHADER_INT64_ATOMIC_ALL_OPS
| Capabilities::SHADER_INT64_ATOMIC_MIN_MAX,
) {
return Err(TypeError::MissingCapability(
Capabilities::SHADER_INT64_ATOMIC_ALL_OPS,
));
}
} else if width != 4 {
return Err(TypeError::InvalidAtomicWidth(kind, width));
)
{
return Err(TypeError::MissingCapability(
Capabilities::SHADER_INT64_ATOMIC_ALL_OPS,
));
}
}
crate::Scalar::F32 => {
if !self
.capabilities
.contains(Capabilities::SHADER_FLOAT32_ATOMIC)
{
return Err(TypeError::MissingCapability(
Capabilities::SHADER_FLOAT32_ATOMIC,
));
}
}
_ => return Err(TypeError::InvalidAtomicWidth(scalar.kind, scalar.width)),
};
TypeInfo::new(
TypeFlags::DATA
| TypeFlags::SIZED
| TypeFlags::HOST_SHAREABLE
| TypeFlags::CREATION_RESOLVED,
Alignment::from_width(width),
Alignment::from_width(scalar.width),
)
}
Ti::Pointer { base, space } => {

View File

@@ -0,0 +1,15 @@
(
god_mode: true,
spv: (
version: (1, 1),
capabilities: [ AtomicFloat32AddEXT ],
),
msl: (
lang_version: (3, 0),
per_entry_point_map: {},
inline_samplers: [],
spirv_cross_compatibility: false,
fake_missing_bindings: true,
zero_initialize_workgroup_memory: false,
),
)

View File

@@ -0,0 +1,47 @@
struct Struct {
atomic_scalar: atomic<f32>,
atomic_arr: array<atomic<f32>, 2>,
}
@group(0) @binding(0)
var<storage, read_write> storage_atomic_scalar: atomic<f32>;
@group(0) @binding(1)
var<storage, read_write> storage_atomic_arr: array<atomic<f32>, 2>;
@group(0) @binding(2)
var<storage, read_write> storage_struct: Struct;
@compute
@workgroup_size(2)
fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
atomicStore(&storage_atomic_scalar, 1.5);
atomicStore(&storage_atomic_arr[1], 1.5);
atomicStore(&storage_struct.atomic_scalar, 1.5);
atomicStore(&storage_struct.atomic_arr[1], 1.5);
workgroupBarrier();
let l0 = atomicLoad(&storage_atomic_scalar);
let l1 = atomicLoad(&storage_atomic_arr[1]);
let l2 = atomicLoad(&storage_struct.atomic_scalar);
let l3 = atomicLoad(&storage_struct.atomic_arr[1]);
workgroupBarrier();
atomicAdd(&storage_atomic_scalar, 1.5);
atomicAdd(&storage_atomic_arr[1], 1.5);
atomicAdd(&storage_struct.atomic_scalar, 1.5);
atomicAdd(&storage_struct.atomic_arr[1], 1.5);
workgroupBarrier();
atomicExchange(&storage_atomic_scalar, 1.5);
atomicExchange(&storage_atomic_arr[1], 1.5);
atomicExchange(&storage_struct.atomic_scalar, 1.5);
atomicExchange(&storage_struct.atomic_arr[1], 1.5);
// // TODO: https://github.com/gpuweb/gpuweb/issues/2021
// atomicCompareExchangeWeak(&storage_atomic_scalar, 1.5);
// atomicCompareExchangeWeak(&storage_atomic_arr[1], 1.5);
// atomicCompareExchangeWeak(&storage_struct.atomic_scalar, 1.5);
// atomicCompareExchangeWeak(&storage_struct.atomic_arr[1], 1.5);
}

View File

@@ -0,0 +1,43 @@
// language: metal3.0
#include <metal_stdlib>
#include <simd/simd.h>
using metal::uint;
struct type_1 {
metal::atomic_float inner[2];
};
struct Struct {
metal::atomic_float atomic_scalar;
type_1 atomic_arr;
};
struct cs_mainInput {
};
kernel void cs_main(
metal::uint3 id [[thread_position_in_threadgroup]]
, device metal::atomic_float& storage_atomic_scalar [[user(fake0)]]
, device type_1& storage_atomic_arr [[user(fake0)]]
, device Struct& storage_struct [[user(fake0)]]
) {
metal::atomic_store_explicit(&storage_atomic_scalar, 1.5, metal::memory_order_relaxed);
metal::atomic_store_explicit(&storage_atomic_arr.inner[1], 1.5, metal::memory_order_relaxed);
metal::atomic_store_explicit(&storage_struct.atomic_scalar, 1.5, metal::memory_order_relaxed);
metal::atomic_store_explicit(&storage_struct.atomic_arr.inner[1], 1.5, metal::memory_order_relaxed);
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
float l0_ = metal::atomic_load_explicit(&storage_atomic_scalar, metal::memory_order_relaxed);
float l1_ = metal::atomic_load_explicit(&storage_atomic_arr.inner[1], metal::memory_order_relaxed);
float l2_ = metal::atomic_load_explicit(&storage_struct.atomic_scalar, metal::memory_order_relaxed);
float l3_ = metal::atomic_load_explicit(&storage_struct.atomic_arr.inner[1], metal::memory_order_relaxed);
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
float _e27 = metal::atomic_fetch_add_explicit(&storage_atomic_scalar, 1.5, metal::memory_order_relaxed);
float _e31 = metal::atomic_fetch_add_explicit(&storage_atomic_arr.inner[1], 1.5, metal::memory_order_relaxed);
float _e35 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_scalar, 1.5, metal::memory_order_relaxed);
float _e40 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_arr.inner[1], 1.5, metal::memory_order_relaxed);
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
float _e43 = metal::atomic_exchange_explicit(&storage_atomic_scalar, 1.5, metal::memory_order_relaxed);
float _e47 = metal::atomic_exchange_explicit(&storage_atomic_arr.inner[1], 1.5, metal::memory_order_relaxed);
float _e51 = metal::atomic_exchange_explicit(&storage_struct.atomic_scalar, 1.5, metal::memory_order_relaxed);
float _e56 = metal::atomic_exchange_explicit(&storage_struct.atomic_arr.inner[1], 1.5, metal::memory_order_relaxed);
return;
}

View File

@@ -0,0 +1,98 @@
; SPIR-V
; Version: 1.1
; Generator: rspirv
; Bound: 62
OpCapability Shader
OpCapability AtomicFloat32AddEXT
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpExtension "SPV_EXT_shader_atomic_float_add"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %22 "cs_main" %19
OpExecutionMode %22 LocalSize 2 1 1
OpDecorate %4 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 4
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %13 Block
OpMemberDecorate %13 0 Offset 0
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 2
OpDecorate %16 Block
OpMemberDecorate %16 0 Offset 0
OpDecorate %19 BuiltIn LocalInvocationId
%2 = OpTypeVoid
%3 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%5 = OpConstant %6 2
%4 = OpTypeArray %3 %5
%7 = OpTypeStruct %3 %4
%8 = OpTypeVector %6 3
%10 = OpTypeStruct %3
%11 = OpTypePointer StorageBuffer %10
%9 = OpVariable %11 StorageBuffer
%13 = OpTypeStruct %4
%14 = OpTypePointer StorageBuffer %13
%12 = OpVariable %14 StorageBuffer
%16 = OpTypeStruct %7
%17 = OpTypePointer StorageBuffer %16
%15 = OpVariable %17 StorageBuffer
%20 = OpTypePointer Input %8
%19 = OpVariable %20 Input
%23 = OpTypeFunction %2
%24 = OpTypePointer StorageBuffer %3
%25 = OpConstant %6 0
%27 = OpTypePointer StorageBuffer %4
%29 = OpTypePointer StorageBuffer %7
%31 = OpConstant %3 1.5
%34 = OpTypeInt 32 1
%33 = OpConstant %34 1
%35 = OpConstant %6 64
%36 = OpConstant %6 1
%40 = OpConstant %6 264
%22 = OpFunction %2 None %23
%18 = OpLabel
%21 = OpLoad %8 %19
%26 = OpAccessChain %24 %9 %25
%28 = OpAccessChain %27 %12 %25
%30 = OpAccessChain %29 %15 %25
OpBranch %32
%32 = OpLabel
OpAtomicStore %26 %33 %35 %31
%37 = OpAccessChain %24 %28 %36
OpAtomicStore %37 %33 %35 %31
%38 = OpAccessChain %24 %30 %25
OpAtomicStore %38 %33 %35 %31
%39 = OpAccessChain %24 %30 %36 %36
OpAtomicStore %39 %33 %35 %31
OpControlBarrier %5 %5 %40
%41 = OpAtomicLoad %3 %26 %33 %35
%42 = OpAccessChain %24 %28 %36
%43 = OpAtomicLoad %3 %42 %33 %35
%44 = OpAccessChain %24 %30 %25
%45 = OpAtomicLoad %3 %44 %33 %35
%46 = OpAccessChain %24 %30 %36 %36
%47 = OpAtomicLoad %3 %46 %33 %35
OpControlBarrier %5 %5 %40
%48 = OpAtomicFAddEXT %3 %26 %33 %35 %31
%50 = OpAccessChain %24 %28 %36
%49 = OpAtomicFAddEXT %3 %50 %33 %35 %31
%52 = OpAccessChain %24 %30 %25
%51 = OpAtomicFAddEXT %3 %52 %33 %35 %31
%54 = OpAccessChain %24 %30 %36 %36
%53 = OpAtomicFAddEXT %3 %54 %33 %35 %31
OpControlBarrier %5 %5 %40
%55 = OpAtomicExchange %3 %26 %33 %35 %31
%57 = OpAccessChain %24 %28 %36
%56 = OpAtomicExchange %3 %57 %33 %35 %31
%59 = OpAccessChain %24 %30 %25
%58 = OpAtomicExchange %3 %59 %33 %35 %31
%61 = OpAccessChain %24 %30 %36 %36
%60 = OpAtomicExchange %3 %61 %33 %35 %31
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,35 @@
struct Struct {
atomic_scalar: atomic<f32>,
atomic_arr: array<atomic<f32>, 2>,
}
@group(0) @binding(0)
var<storage, read_write> storage_atomic_scalar: atomic<f32>;
@group(0) @binding(1)
var<storage, read_write> storage_atomic_arr: array<atomic<f32>, 2>;
@group(0) @binding(2)
var<storage, read_write> storage_struct: Struct;
@compute @workgroup_size(2, 1, 1)
fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
atomicStore((&storage_atomic_scalar), 1.5f);
atomicStore((&storage_atomic_arr[1]), 1.5f);
atomicStore((&storage_struct.atomic_scalar), 1.5f);
atomicStore((&storage_struct.atomic_arr[1]), 1.5f);
workgroupBarrier();
let l0_ = atomicLoad((&storage_atomic_scalar));
let l1_ = atomicLoad((&storage_atomic_arr[1]));
let l2_ = atomicLoad((&storage_struct.atomic_scalar));
let l3_ = atomicLoad((&storage_struct.atomic_arr[1]));
workgroupBarrier();
let _e27 = atomicAdd((&storage_atomic_scalar), 1.5f);
let _e31 = atomicAdd((&storage_atomic_arr[1]), 1.5f);
let _e35 = atomicAdd((&storage_struct.atomic_scalar), 1.5f);
let _e40 = atomicAdd((&storage_struct.atomic_arr[1]), 1.5f);
workgroupBarrier();
let _e43 = atomicExchange((&storage_atomic_scalar), 1.5f);
let _e47 = atomicExchange((&storage_atomic_arr[1]), 1.5f);
let _e51 = atomicExchange((&storage_struct.atomic_scalar), 1.5f);
let _e56 = atomicExchange((&storage_struct.atomic_arr[1]), 1.5f);
return;
}

View File

@@ -788,6 +788,10 @@ fn convert_wgsl() {
"atomicOps-int64-min-max",
Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL,
),
(
"atomicOps-float32",
Targets::SPIRV | Targets::METAL | Targets::WGSL,
),
(
"atomicCompareExchange-int64",
Targets::SPIRV | Targets::WGSL,

View File

@@ -151,6 +151,46 @@ static INT64_ATOMIC_ALL_OPS: GpuTestConfiguration = GpuTestConfiguration::new()
)
});
fn create_float32_atomic_test() -> Vec<ShaderTest> {
let mut tests = Vec::new();
let test = ShaderTest::new(
"atomicAdd".into(),
"value: f32".into(),
"atomicStore(&output, 0.0); atomicAdd(&output, -0.50); atomicAdd(&output, 1.75);".into(),
&[0_f32],
&[1.25_f32],
)
.output_type("atomic<f32>".into());
tests.push(test);
let test = ShaderTest::new(
"atomicAdd".into(),
"value: f32".into(),
"atomicStore(&output, 0.0); atomicSub(&output, -2.5); atomicSub(&output, 3.0);".into(),
&[0_f32],
&[-0.5_f32],
)
.output_type("atomic<f32>".into());
tests.push(test);
tests
}
#[gpu_test]
static FLOAT32_ATOMIC: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(wgt::Features::SHADER_FLOAT32_ATOMIC)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(ctx, InputStorageType::Storage, create_float32_atomic_test())
});
// See https://github.com/gfx-rs/wgpu/issues/5276
/*
fn create_int64_polyfill_test() -> Vec<ShaderTest> {

View File

@@ -425,6 +425,10 @@ pub fn create_validator(
Caps::SHADER_INT64_ATOMIC_ALL_OPS,
features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS),
);
caps.set(
Caps::SHADER_FLOAT32_ATOMIC,
features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC),
);
caps.set(
Caps::MULTISAMPLED_SHADING,
downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING),

View File

@@ -838,6 +838,10 @@ impl super::PrivateCapabilities {
&& ((device.supports_family(MTLGPUFamily::Apple8)
&& device.supports_family(MTLGPUFamily::Mac2))
|| device.supports_family(MTLGPUFamily::Apple9)),
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6
float_atomics: family_check
&& (device.supports_family(MTLGPUFamily::Apple7)
|| device.supports_family(MTLGPUFamily::Mac2)),
supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac),
}
}
@@ -913,6 +917,10 @@ impl super::PrivateCapabilities {
F::SHADER_INT64_ATOMIC_MIN_MAX,
self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4,
);
features.set(
F::SHADER_FLOAT32_ATOMIC,
self.float_atomics && self.msl_version >= MTLLanguageVersion::V3_0,
);
features.set(
F::ADDRESS_MODE_CLAMP_TO_BORDER,

View File

@@ -290,6 +290,7 @@ struct PrivateCapabilities {
supports_simd_scoped_operations: bool,
int64: bool,
int64_atomics: bool,
float_atomics: bool,
supports_shared_event: bool,
}

View File

@@ -109,6 +109,9 @@ pub struct PhysicalDeviceFeatures {
/// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2.
shader_atomic_int64: Option<vk::PhysicalDeviceShaderAtomicInt64Features<'static>>,
/// Features provided by `VK_EXT_shader_atomic_float`.
shader_atomic_float: Option<vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT<'static>>,
/// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3.
subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlFeatures<'static>>,
}
@@ -157,6 +160,9 @@ impl PhysicalDeviceFeatures {
if let Some(ref mut feature) = self.shader_atomic_int64 {
info = info.push_next(feature);
}
if let Some(ref mut feature) = self.shader_atomic_float {
info = info.push_next(feature);
}
if let Some(ref mut feature) = self.subgroup_size_control {
info = info.push_next(feature);
}
@@ -438,6 +444,16 @@ impl PhysicalDeviceFeatures {
} else {
None
},
shader_atomic_float: if enabled_extensions.contains(&ext::shader_atomic_float::NAME) {
let needed = requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC);
Some(
vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT::default()
.shader_buffer_float32_atomics(needed)
.shader_buffer_float32_atomic_add(needed),
)
} else {
None
},
subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3
|| enabled_extensions.contains(&ext::subgroup_size_control::NAME)
{
@@ -582,6 +598,14 @@ impl PhysicalDeviceFeatures {
);
}
if let Some(ref shader_atomic_float) = self.shader_atomic_float {
features.set(
F::SHADER_FLOAT32_ATOMIC,
shader_atomic_float.shader_buffer_float32_atomics != 0
&& shader_atomic_float.shader_buffer_float32_atomic_add != 0,
);
}
//if caps.supports_extension(khr::sampler_mirror_clamp_to_edge::NAME) {
//if caps.supports_extension(ext::sampler_filter_minmax::NAME) {
features.set(
@@ -994,6 +1018,11 @@ impl PhysicalDeviceProperties {
extensions.push(khr::shader_atomic_int64::NAME);
}
// Require `VK_EXT_shader_atomic_float` if the associated feature was requested
if requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) {
extensions.push(ext::shader_atomic_float::NAME);
}
// Require VK_GOOGLE_display_timing if the associated feature was requested
if requested_features.contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING) {
extensions.push(google::display_timing::NAME);
@@ -1289,6 +1318,12 @@ impl super::InstanceShared {
features2 = features2.push_next(next);
}
if capabilities.supports_extension(ext::shader_atomic_float::NAME) {
let next = features
.shader_atomic_float
.insert(vk::PhysicalDeviceShaderAtomicFloatFeaturesEXT::default());
features2 = features2.push_next(next);
}
if capabilities.supports_extension(ext::image_robustness::NAME) {
let next = features
.image_robustness
@@ -1784,6 +1819,10 @@ impl super::Adapter {
capabilities.push(spv::Capability::Int64Atomics);
}
if features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) {
capabilities.push(spv::Capability::AtomicFloat32AddEXT);
}
let mut flags = spv::WriterFlags::empty();
flags.set(
spv::WriterFlags::DEBUG,

View File

@@ -405,6 +405,17 @@ bitflags::bitflags! {
// Native Features:
//
/// Allows shaders to use f32 atomic load, store, add, sub, and exchange.
///
/// Supported platforms:
/// - Metal (with MSL 3.0+ and Apple7+/Mac2)
/// - Vulkan (with [VK_EXT_shader_atomic_float])
///
/// This is a native only feature.
///
/// [VK_EXT_shader_atomic_float]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_shader_atomic_float.html
const SHADER_FLOAT32_ATOMIC = 1 << 19;
// The features starting with a ? are features that might become part of the spec or
// at the very least we can implement as native features; since they should cover all
// possible formats and capabilities across backends.
@@ -964,7 +975,7 @@ impl Features {
/// Mask of all features which are part of the upstream WebGPU standard.
#[must_use]
pub const fn all_webgpu_mask() -> Self {
Self::from_bits_truncate(0xFFFFF)
Self::from_bits_truncate(0x7FFFF)
}
/// Mask of all features that are only available when targeting native (not web).