feat: implement F16 support in shaders (#5701)

Co-authored-by: FL33TW00D <fleetwoodpersonal@gmail.com>
Co-authored-by: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Co-authored-by: ErichDonGubler <erichdongubler@gmail.com>
This commit is contained in:
Christopher Fleetwood
2025-03-19 16:26:50 +00:00
committed by GitHub
parent b912232188
commit c6286791fe
62 changed files with 3743 additions and 998 deletions

14
Cargo.lock generated
View File

@@ -1785,8 +1785,12 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1"
dependencies = [
"arbitrary",
"bytemuck",
"cfg-if",
"crunchy",
"num-traits",
"serde",
]
[[package]]
@@ -2193,6 +2197,12 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "libredox"
version = "0.1.3"
@@ -2393,12 +2403,14 @@ dependencies = [
"codespan-reporting",
"diff",
"env_logger",
"half",
"hashbrown",
"hexf-parse",
"hlsl-snapshots",
"indexmap",
"itertools 0.13.0",
"log",
"num-traits",
"petgraph 0.7.1",
"pp-rs",
"ron",
@@ -2574,6 +2586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
"libm",
]
[[package]]
@@ -4843,6 +4856,7 @@ dependencies = [
"env_logger",
"futures-lite",
"glam",
"half",
"image",
"itertools 0.13.0",
"js-sys",

View File

@@ -108,6 +108,7 @@ fern = "0.7"
flume = "0.11"
futures-lite = "2"
glam = "0.29"
half = "2.5" # We require 2.5 to have `Arbitrary` support.
hashbrown = { version = "0.14.5", default-features = false, features = [
"ahash",
"inline-more",

View File

@@ -41,10 +41,17 @@ msl-out = []
## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
msl-out-if-target-apple = []
serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"]
serialize = [
"dep:serde",
"bitflags/serde",
"half/serde",
"hashbrown/serde",
"indexmap/serde",
]
deserialize = [
"dep:serde",
"bitflags/serde",
"half/serde",
"hashbrown/serde",
"indexmap/serde",
]
@@ -84,9 +91,12 @@ termcolor = { version = "1.4.1" }
# https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
codespan-reporting = { version = "0.11.0" }
hashbrown.workspace = true
half = { workspace = true, features = ["arbitrary", "num-traits"] }
rustc-hash.workspace = true
indexmap.workspace = true
log = "0.4"
# `half` requires 0.2.16 for `FromBytes` and `ToBytes`.
num-traits = "0.2.16"
strum = { workspace = true, optional = true }
spirv = { version = "0.3", optional = true }
thiserror.workspace = true

View File

@@ -2703,6 +2703,9 @@ impl<'a, W: Write> Writer<'a, W> {
// decimal part even it's zero which is needed for a valid glsl float constant
crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?,
crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
crate::Literal::F16(_) => {
return Err(Error::Custom("GLSL has no 16-bit float type".into()));
}
// Unsigned integers need a `u` at the end
//
// While `core` doesn't necessarily need it, it's allowed and since `es` needs it we

View File

@@ -2628,6 +2628,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
// decimal part even it's zero
crate::Literal::F64(value) => write!(self.out, "{value:?}L")?,
crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
crate::Literal::F16(value) => write!(self.out, "{value:?}h")?,
crate::Literal::U32(value) => write!(self.out, "{value}u")?,
// HLSL has no suffix for explicit i32 literals, but not using any suffix
// makes the type ambiguous which prevents overload resolution from

View File

@@ -8,6 +8,9 @@ use core::{
fmt::{Display, Error as FmtError, Formatter, Write},
iter,
};
use num_traits::real::Real as _;
use half::f16;
use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo};
use crate::{
@@ -182,9 +185,11 @@ impl Display for TypeContext<'_> {
write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
}
crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
crate::TypeInner::Matrix { columns, rows, .. } => {
put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
}
crate::TypeInner::Matrix {
columns,
rows,
scalar,
} => put_numeric_type(out, scalar, &[rows, columns]),
crate::TypeInner::Pointer { base, space } => {
let sub = Self {
handle: base,
@@ -425,8 +430,12 @@ impl crate::Scalar {
match self {
Self {
kind: Sk::Float,
width: _,
width: 4,
} => "float",
Self {
kind: Sk::Float,
width: 2,
} => "half",
Self {
kind: Sk::Sint,
width: 4,
@@ -483,7 +492,7 @@ fn should_pack_struct_member(
match *ty_inner {
crate::TypeInner::Vector {
size: crate::VectorSize::Tri,
scalar: scalar @ crate::Scalar { width: 4, .. },
scalar: scalar @ crate::Scalar { width: 4 | 2, .. },
} if is_tight => Some(scalar),
_ => None,
}
@@ -1459,6 +1468,21 @@ impl<W: Write> Writer<W> {
crate::Literal::F64(_) => {
return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
}
crate::Literal::F16(value) => {
if value.is_infinite() {
let sign = if value.is_sign_negative() { "-" } else { "" };
write!(self.out, "{sign}INFINITY")?;
} else if value.is_nan() {
write!(self.out, "NAN")?;
} else {
let suffix = if value.fract() == f16::from_f32(0.0) {
".0h"
} else {
"h"
};
write!(self.out, "{value}{suffix}")?;
}
}
crate::Literal::F32(value) => {
if value.is_infinite() {
let sign = if value.is_sign_negative() { "-" } else { "" };

View File

@@ -4,6 +4,7 @@ Implementations for `BlockContext` methods.
use alloc::vec::Vec;
use arrayvec::ArrayVec;
use spirv::Word;
use super::{
@@ -1612,159 +1613,7 @@ impl BlockContext<'_> {
expr,
kind,
convert,
} => {
use crate::ScalarKind as Sk;
let expr_id = self.cached[expr];
let (src_scalar, src_size, is_matrix) =
match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) {
crate::TypeInner::Scalar(scalar) => (scalar, None, false),
crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false),
crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true),
ref other => {
log::error!("As source {:?}", other);
return Err(Error::Validation("Unexpected Expression::As source"));
}
};
enum Cast {
Identity,
Unary(spirv::Op),
Binary(spirv::Op, Word),
Ternary(spirv::Op, Word, Word),
}
let cast = if is_matrix {
// we only support identity casts for matrices
Cast::Unary(spirv::Op::CopyObject)
} else {
match (src_scalar.kind, kind, convert) {
// Filter out identity casts. Some Adreno drivers are
// confused by no-op OpBitCast instructions.
(src_kind, kind, convert)
if src_kind == kind
&& convert.filter(|&width| width != src_scalar.width).is_none() =>
{
Cast::Identity
}
(Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
(_, _, None) => Cast::Unary(spirv::Op::Bitcast),
// casting to a bool - generate `OpXxxNotEqual`
(_, Sk::Bool, Some(_)) => {
let op = match src_scalar.kind {
Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
Sk::Float => spirv::Op::FUnordNotEqual,
Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
};
let zero_scalar_id =
self.writer.get_constant_scalar_with(0, src_scalar)?;
let zero_id = match src_size {
Some(size) => {
let ty = LocalType::Numeric(NumericType::Vector {
size,
scalar: src_scalar,
})
.into();
self.temp_list.clear();
self.temp_list.resize(size as _, zero_scalar_id);
self.writer.get_constant_composite(ty, &self.temp_list)
}
None => zero_scalar_id,
};
Cast::Binary(op, zero_id)
}
// casting from a bool - generate `OpSelect`
(Sk::Bool, _, Some(dst_width)) => {
let dst_scalar = crate::Scalar {
kind,
width: dst_width,
};
let zero_scalar_id =
self.writer.get_constant_scalar_with(0, dst_scalar)?;
let one_scalar_id =
self.writer.get_constant_scalar_with(1, dst_scalar)?;
let (accept_id, reject_id) = match src_size {
Some(size) => {
let ty = LocalType::Numeric(NumericType::Vector {
size,
scalar: dst_scalar,
})
.into();
self.temp_list.clear();
self.temp_list.resize(size as _, zero_scalar_id);
let vec0_id =
self.writer.get_constant_composite(ty, &self.temp_list);
self.temp_list.fill(one_scalar_id);
let vec1_id =
self.writer.get_constant_composite(ty, &self.temp_list);
(vec1_id, vec0_id)
}
None => (one_scalar_id, zero_scalar_id),
};
Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
}
(Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
(Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
(Sk::Float, Sk::Float, Some(dst_width))
if src_scalar.width != dst_width =>
{
Cast::Unary(spirv::Op::FConvert)
}
(Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
(Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::SConvert)
}
(Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
(Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::UConvert)
}
(Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::SConvert)
}
(Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::UConvert)
}
// We assume it's either an identity cast, or int-uint.
_ => Cast::Unary(spirv::Op::Bitcast),
}
};
let id = self.gen_id();
let instruction = match cast {
Cast::Identity => None,
Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)),
Cast::Binary(op, operand) => Some(Instruction::binary(
op,
result_type_id,
id,
expr_id,
operand,
)),
Cast::Ternary(op, op1, op2) => Some(Instruction::ternary(
op,
result_type_id,
id,
expr_id,
op1,
op2,
)),
};
if let Some(instruction) = instruction {
block.body.push(instruction);
id
} else {
expr_id
}
}
} => self.write_as_expression(expr, convert, kind, block, result_type_id)?,
crate::Expression::ImageLoad {
image,
coordinate,
@@ -1925,6 +1774,237 @@ impl BlockContext<'_> {
Ok(())
}
/// Helper which focuses on generating the `As` expressions and the various conversions
/// that need to happen because of that.
fn write_as_expression(
&mut self,
expr: Handle<crate::Expression>,
convert: Option<u8>,
kind: crate::ScalarKind,
block: &mut Block,
result_type_id: u32,
) -> Result<u32, Error> {
use crate::ScalarKind as Sk;
let expr_id = self.cached[expr];
let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types);
// Matrix casts needs special treatment in SPIR-V, as the cast functions
// can take vectors or scalars, but not matrices. In order to cast a matrix
// we need to cast each column of the matrix individually and construct a new
// matrix from the converted columns.
if let crate::TypeInner::Matrix {
columns,
rows,
scalar,
} = *ty
{
let Some(convert) = convert else {
// No conversion needs to be done, passes through.
return Ok(expr_id);
};
if convert == scalar.width {
// No conversion needs to be done, passes through.
return Ok(expr_id);
}
if kind != Sk::Float {
// Only float conversions are supported for matrices.
return Err(Error::Validation("Matrices must be floats"));
}
// Type of each extracted column
let column_src_ty =
self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
size: rows,
scalar,
})));
// Type of the column after conversion
let column_dst_ty =
self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
size: rows,
scalar: crate::Scalar {
kind,
width: convert,
},
})));
let mut components = ArrayVec::<Word, 4>::new();
for column in 0..columns as usize {
let column_id = self.gen_id();
block.body.push(Instruction::composite_extract(
column_src_ty,
column_id,
expr_id,
&[column as u32],
));
let column_conv_id = self.gen_id();
block.body.push(Instruction::unary(
spirv::Op::FConvert,
column_dst_ty,
column_conv_id,
column_id,
));
components.push(column_conv_id);
}
let construct_id = self.gen_id();
block.body.push(Instruction::composite_construct(
result_type_id,
construct_id,
&components,
));
return Ok(construct_id);
}
let (src_scalar, src_size) = match *ty {
crate::TypeInner::Scalar(scalar) => (scalar, None),
crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)),
ref other => {
log::error!("As source {:?}", other);
return Err(Error::Validation("Unexpected Expression::As source"));
}
};
enum Cast {
Identity,
Unary(spirv::Op),
Binary(spirv::Op, Word),
Ternary(spirv::Op, Word, Word),
}
let cast = match (src_scalar.kind, kind, convert) {
// Filter out identity casts. Some Adreno drivers are
// confused by no-op OpBitCast instructions.
(src_kind, kind, convert)
if src_kind == kind
&& convert.filter(|&width| width != src_scalar.width).is_none() =>
{
Cast::Identity
}
(Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
(_, _, None) => Cast::Unary(spirv::Op::Bitcast),
// casting to a bool - generate `OpXxxNotEqual`
(_, Sk::Bool, Some(_)) => {
let op = match src_scalar.kind {
Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
Sk::Float => spirv::Op::FUnordNotEqual,
Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
};
let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?;
let zero_id = match src_size {
Some(size) => {
let ty = LocalType::Numeric(NumericType::Vector {
size,
scalar: src_scalar,
})
.into();
self.temp_list.clear();
self.temp_list.resize(size as _, zero_scalar_id);
self.writer.get_constant_composite(ty, &self.temp_list)
}
None => zero_scalar_id,
};
Cast::Binary(op, zero_id)
}
// casting from a bool - generate `OpSelect`
(Sk::Bool, _, Some(dst_width)) => {
let dst_scalar = crate::Scalar {
kind,
width: dst_width,
};
let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?;
let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?;
let (accept_id, reject_id) = match src_size {
Some(size) => {
let ty = LocalType::Numeric(NumericType::Vector {
size,
scalar: dst_scalar,
})
.into();
self.temp_list.clear();
self.temp_list.resize(size as _, zero_scalar_id);
let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list);
self.temp_list.fill(one_scalar_id);
let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list);
(vec1_id, vec0_id)
}
None => (one_scalar_id, zero_scalar_id),
};
Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
}
(Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
(Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
(Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::FConvert)
}
(Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
(Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::SConvert)
}
(Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
(Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::UConvert)
}
(Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::SConvert)
}
(Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
Cast::Unary(spirv::Op::UConvert)
}
// We assume it's either an identity cast, or int-uint.
_ => Cast::Unary(spirv::Op::Bitcast),
};
Ok(match cast {
Cast::Identity => expr_id,
Cast::Unary(op) => {
let id = self.gen_id();
block
.body
.push(Instruction::unary(op, result_type_id, id, expr_id));
id
}
Cast::Binary(op, operand) => {
let id = self.gen_id();
block.body.push(Instruction::binary(
op,
result_type_id,
id,
expr_id,
operand,
));
id
}
Cast::Ternary(op, op1, op2) => {
let id = self.gen_id();
block.body.push(Instruction::ternary(
op,
result_type_id,
id,
expr_id,
op1,
op2,
));
id
}
})
}
/// Build an `OpAccessChain` instruction.
///
/// Emit any needed bounds-checking expressions to `block`.

View File

@@ -405,6 +405,10 @@ impl super::Instruction {
instruction
}
pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self {
Self::constant(result_type_id, id, &[low])
}
pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self {
Self::constant(result_type_id, id, &[value])
}

View File

@@ -1154,6 +1154,15 @@ impl Writer {
if bits == 64 {
self.capabilities_used.insert(spirv::Capability::Float64);
}
if bits == 16 {
self.capabilities_used.insert(spirv::Capability::Float16);
self.capabilities_used
.insert(spirv::Capability::StorageBuffer16BitAccess);
self.capabilities_used
.insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
self.capabilities_used
.insert(spirv::Capability::StorageInputOutput16);
}
Instruction::type_float(id, bits)
}
Sk::Bool => Instruction::type_bool(id),
@@ -1222,6 +1231,19 @@ impl Writer {
)?;
self.use_extension("SPV_EXT_shader_atomic_float_add");
}
// 16 bit floating-point support requires Float16 capability
crate::TypeInner::Matrix {
scalar: crate::Scalar::F16,
..
}
| crate::TypeInner::Vector {
scalar: crate::Scalar::F16,
..
}
| crate::TypeInner::Scalar(crate::Scalar::F16) => {
self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?;
self.use_extension("SPV_KHR_16bit_storage");
}
_ => {}
}
Ok(())
@@ -1498,6 +1520,10 @@ impl Writer {
Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32)
}
crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
crate::Literal::F16(value) => {
let low = value.to_bits();
Instruction::constant_16bit(type_id, id, low as u32)
}
crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
crate::Literal::U64(value) => {

View File

@@ -5,6 +5,7 @@ use alloc::{
vec::Vec,
};
use core::fmt::Write;
use hashbrown::HashSet;
use super::Error;
use super::ToWgslIfImplemented as _;
@@ -129,6 +130,9 @@ impl<W: Write> Writer<W> {
// Write all needed directives.
self.write_enable_dual_source_blending_if_needed(module)?;
// Write all `enable` declarations
self.write_enable_declarations(module)?;
// Write all structs
for (handle, ty) in module.types.iter() {
if let TypeInner::Struct { ref members, .. } = ty.inner {
@@ -222,6 +226,41 @@ impl<W: Write> Writer<W> {
Ok(())
}
/// Helper method which writes all the `enable` declarations
/// needed for a module.
fn write_enable_declarations(&mut self, module: &Module) -> BackendResult {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum WrittenDeclarations {
F16,
}
let mut written_declarations = HashSet::new();
// Write all the `enable` declarations
for (_, ty) in module.types.iter() {
match ty.inner {
TypeInner::Scalar(scalar)
| TypeInner::Vector { scalar, .. }
| TypeInner::Matrix { scalar, .. } => {
if scalar == crate::Scalar::F16
&& !written_declarations.contains(&WrittenDeclarations::F16)
{
writeln!(self.out, "enable f16;")?;
written_declarations.insert(WrittenDeclarations::F16);
}
}
_ => {}
}
}
if !written_declarations.is_empty() {
// Empty line for readability
writeln!(self.out)?;
}
Ok(())
}
/// Helper method used to write
/// [functions](https://gpuweb.github.io/gpuweb/wgsl/#functions)
///
@@ -1092,6 +1131,7 @@ impl<W: Write> Writer<W> {
match expressions[expr] {
Expression::Literal(literal) => match literal {
crate::Literal::F16(value) => write!(self.out, "{value}h")?,
crate::Literal::F32(value) => write!(self.out, "{value}f")?,
crate::Literal::U32(value) => write!(self.out, "{value}u")?,
crate::Literal::I32(value) => {

View File

@@ -270,6 +270,7 @@ impl TryToWgsl for crate::Scalar {
Some(match self {
Scalar::F64 => "f64",
Scalar::F32 => "f32",
Scalar::F16 => "f16",
Scalar::I32 => "i32",
Scalar::U32 => "u32",
Scalar::I64 => "i64",

View File

@@ -109,9 +109,15 @@ pub enum ErrorKind {
/// Unsupported matrix of the form matCx2
///
/// Our IR expects matrices of the form matCx2 to have a stride of 8 however
/// matrices in the std140 layout have a stride of at least 16
#[error("unsupported matrix of the form matCx2 in std140 block layout")]
UnsupportedMatrixTypeInStd140,
/// matrices in the std140 layout have a stride of at least 16.
#[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 },
/// Unsupported matrix of the form f16matCxR
///
/// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count,
/// however matrices in the std140 layout have a stride of at least 16.
#[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 },
/// A variable with the same name already exists in the current scope.
#[error("Variable already declared: {0}")]
VariableAlreadyDeclared(String),

View File

@@ -122,11 +122,25 @@ pub fn calculate_offset(
}
// See comment on the error kind
if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi {
errors.push(Error {
kind: ErrorKind::UnsupportedMatrixTypeInStd140,
meta,
});
if StructLayout::Std140 == layout {
// Do the f16 test first, as it's more specific
if scalar == Scalar::F16 {
errors.push(Error {
kind: ErrorKind::UnsupportedF16MatrixInStd140 {
columns: columns as u8,
rows: rows as u8,
},
meta,
});
}
if rows == crate::VectorSize::Bi {
errors.push(Error {
kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 {
columns: columns as u8,
},
meta,
});
}
}
(align, align * columns as u32)

View File

@@ -12,6 +12,10 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
name: None,
inner: TypeInner::Scalar(Scalar::BOOL),
}),
"float16_t" => Some(Type {
name: None,
inner: TypeInner::Scalar(Scalar::F16),
}),
"float" => Some(Type {
name: None,
inner: TypeInner::Scalar(Scalar::F32),
@@ -42,6 +46,7 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
"i" => Scalar::I32,
"u" => Scalar::U32,
"d" => Scalar::F64,
"f16" => Scalar::F16,
_ => return None,
})
}

View File

@@ -39,6 +39,7 @@ use alloc::{borrow::ToOwned, format, string::String, vec, vec::Vec};
use core::{convert::TryInto, mem, num::NonZeroU32};
use std::path::PathBuf;
use half::f16;
use petgraph::graphmap::GraphMap;
use super::atomic_upgrade::Upgrades;
@@ -82,6 +83,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
"SPV_KHR_vulkan_memory_model",
"SPV_KHR_multiview",
"SPV_EXT_shader_atomic_float_add",
"SPV_KHR_16bit_storage",
];
pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];
@@ -5604,6 +5606,9 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
}) => {
let low = self.next()?;
match width {
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal
// If a numeric types bit width is less than 32-bits, the value appears in the low-order bits of the word.
2 => crate::Literal::F16(f16::from_bits(low as u16)),
4 => crate::Literal::F32(f32::from_bits(low)),
8 => {
inst.expect(5)?;

View File

@@ -153,8 +153,6 @@ pub enum NumberError {
Invalid,
#[error("numeric literal not representable by target type")]
NotRepresentable,
#[error("unimplemented f16 type")]
UnimplementedF16,
}
#[derive(Copy, Clone, Debug, PartialEq)]
@@ -1026,23 +1024,22 @@ impl<'a> Error<'a> {
)],
},
Error::EnableExtensionNotEnabled { kind, span } => ParseError {
message: format!("`{}` enable-extension is not enabled", kind.to_ident()),
message: format!("the `{}` enable extension is not enabled", kind.to_ident()),
labels: vec![(
span,
format!(
concat!(
"the `{}` enable-extension is needed for this functionality, ",
"but it is not currently enabled"
"the `{}` \"Enable Extension\" is needed for this functionality, ",
"but it is not currently enabled."
),
kind.to_ident()
)
.into(),
)],
#[allow(irrefutable_let_patterns)]
notes: if let EnableExtension::Unimplemented(kind) = kind {
vec![format!(
concat!(
"This enable-extension is not yet implemented. ",
"This \"Enable Extension\" is not yet implemented. ",
"Let Naga maintainers know that you ran into this at ",
"<https://github.com/gfx-rs/wgpu/issues/{}>, ",
"so they can prioritize it!"
@@ -1050,7 +1047,12 @@ impl<'a> Error<'a> {
kind.tracking_issue_num()
)]
} else {
vec![]
vec![
format!(
"You can enable this extension by adding `enable {};` at the top of the shader, before any other items.",
kind.to_ident()
),
]
},
},
Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError {

View File

@@ -2013,6 +2013,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
let expr: Typed<crate::Expression> = match *expr {
ast::Expression::Literal(literal) => {
let literal = match literal {
ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f),
ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),

View File

@@ -1,3 +1,6 @@
use crate::front::wgsl::parse::directive::enable_extension::{
EnableExtensions, ImplementedEnableExtension,
};
use crate::front::wgsl::{Error, Result, Scalar};
use crate::Span;
@@ -113,10 +116,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<'_, crate::StorageFo
})
}
pub fn get_scalar_type(word: &str) -> Option<Scalar> {
pub fn get_scalar_type(
enable_extensions: &EnableExtensions,
span: Span,
word: &str,
) -> Result<'static, Option<Scalar>> {
use crate::ScalarKind as Sk;
match word {
// "f16" => Some(Scalar { kind: Sk::Float, width: 2 }),
let scalar = match word {
"f16" => Some(Scalar {
kind: Sk::Float,
width: 2,
}),
"f32" => Some(Scalar {
kind: Sk::Float,
width: 4,
@@ -146,7 +156,18 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
width: crate::BOOL_WIDTH,
}),
_ => None,
};
if matches!(scalar, Some(Scalar::F16))
&& !enable_extensions.contains(ImplementedEnableExtension::F16)
{
return Err(Box::new(Error::EnableExtensionNotEnabled {
span,
kind: ImplementedEnableExtension::F16.into(),
}));
}
Ok(scalar)
}
pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> {

View File

@@ -11,19 +11,23 @@ use alloc::boxed::Box;
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct EnableExtensions {
dual_source_blending: bool,
/// Whether `enable f16;` was written earlier in the shader module.
f16: bool,
}
impl EnableExtensions {
pub(crate) const fn empty() -> Self {
Self {
f16: false,
dual_source_blending: false,
}
}
/// Add an enable-extension to the set requested by a module.
pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) {
let field: &mut bool = match ext {
let field = match ext {
ImplementedEnableExtension::DualSourceBlending => &mut self.dual_source_blending,
ImplementedEnableExtension::F16 => &mut self.f16,
};
*field = true;
}
@@ -32,6 +36,7 @@ impl EnableExtensions {
pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool {
match ext {
ImplementedEnableExtension::DualSourceBlending => self.dual_source_blending,
ImplementedEnableExtension::F16 => self.f16,
}
}
}
@@ -47,7 +52,6 @@ impl Default for EnableExtensions {
/// WGSL spec.: <https://www.w3.org/TR/WGSL/#enable-extensions-sec>
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub enum EnableExtension {
#[allow(unused)]
Implemented(ImplementedEnableExtension),
Unimplemented(UnimplementedEnableExtension),
}
@@ -66,7 +70,7 @@ impl EnableExtension {
/// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible.
pub(crate) fn from_ident(word: &str, span: Span) -> Result<Self> {
Ok(match word {
Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16),
Self::F16 => Self::Implemented(ImplementedEnableExtension::F16),
Self::CLIP_DISTANCES => {
Self::Unimplemented(UnimplementedEnableExtension::ClipDistances)
}
@@ -82,10 +86,9 @@ impl EnableExtension {
match self {
Self::Implemented(kind) => match kind {
ImplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING,
ImplementedEnableExtension::F16 => Self::F16,
},
Self::Unimplemented(kind) => match kind {
UnimplementedEnableExtension::F16 => Self::F16,
UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES,
},
}
@@ -101,17 +104,17 @@ pub enum ImplementedEnableExtension {
///
/// [`enable dual_source_blending;`]: https://www.w3.org/TR/WGSL/#extension-dual_source_blending
DualSourceBlending,
}
/// A variant of [`EnableExtension::Unimplemented`].
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub enum UnimplementedEnableExtension {
/// Enables `f16`/`half` primitive support in all shader languages.
///
/// In the WGSL standard, this corresponds to [`enable f16;`].
///
/// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16
F16,
}
/// A variant of [`EnableExtension::Unimplemented`].
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub enum UnimplementedEnableExtension {
/// Enables the `clip_distances` variable in WGSL.
///
/// In the WGSL standard, this corresponds to [`enable clip_distances;`].
@@ -123,7 +126,6 @@ pub enum UnimplementedEnableExtension {
impl UnimplementedEnableExtension {
pub(crate) const fn tracking_issue_num(self) -> u16 {
match self {
Self::F16 => 4384,
Self::ClipDistances => 6236,
}
}

View File

@@ -413,14 +413,17 @@ impl<'a> Lexer<'a> {
/// Parses a generic scalar type, for example `<f32>`.
pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> {
self.expect_generic_paren('<')?;
let pair = match self.next() {
let (scalar, _span) = match self.next() {
(Token::Word(word), span) => {
conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
conv::get_scalar_type(&self.enable_extensions, span, word)?
.map(|scalar| (scalar, span))
.ok_or(Error::UnknownScalarType(span))?
}
(_, span) => Err(Error::UnknownScalarType(span)),
}?;
(_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
};
self.expect_generic_paren('>')?;
Ok(pair)
Ok(scalar)
}
/// Parses a generic scalar type, for example `<f32>`.
@@ -430,14 +433,18 @@ impl<'a> Lexer<'a> {
&mut self,
) -> Result<'a, (Scalar, Span)> {
self.expect_generic_paren('<')?;
let pair = match self.next() {
(Token::Word(word), span) => conv::get_scalar_type(word)
.map(|scalar| (scalar, span))
.ok_or(Error::UnknownScalarType(span)),
(_, span) => Err(Error::UnknownScalarType(span)),
}?;
let (scalar, span) = match self.next() {
(Token::Word(word), span) => {
conv::get_scalar_type(&self.enable_extensions, span, word)?
.map(|scalar| (scalar, span))
.ok_or(Error::UnknownScalarType(span))?
}
(_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
};
self.expect_generic_paren('>')?;
Ok(pair)
Ok((scalar, span))
}
pub(in crate::front::wgsl) fn next_storage_access(
@@ -518,6 +525,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) {
#[test]
fn test_numbers() {
use half::f16;
// WGSL spec examples //
// decimal integer
@@ -542,14 +550,16 @@ fn test_numbers() {
Token::Number(Ok(Number::AbstractFloat(0.01))),
Token::Number(Ok(Number::AbstractFloat(12.34))),
Token::Number(Ok(Number::F32(0.))),
Token::Number(Err(NumberError::UnimplementedF16)),
Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
Token::Number(Ok(Number::AbstractFloat(0.001))),
Token::Number(Ok(Number::AbstractFloat(43.75))),
Token::Number(Ok(Number::F32(16.))),
Token::Number(Ok(Number::AbstractFloat(0.1875))),
Token::Number(Err(NumberError::UnimplementedF16)),
// https://github.com/gfx-rs/wgpu/issues/7046
Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
Token::Number(Ok(Number::AbstractFloat(0.12109375))),
Token::Number(Err(NumberError::UnimplementedF16)),
// https://github.com/gfx-rs/wgpu/issues/7046
Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
],
);

View File

@@ -6,9 +6,7 @@ use crate::diagnostic_filter::{
ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule,
};
use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken};
use crate::front::wgsl::parse::directive::enable_extension::{
EnableExtension, EnableExtensions, UnimplementedEnableExtension,
};
use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions};
use crate::front::wgsl::parse::directive::language_extension::LanguageExtension;
use crate::front::wgsl::parse::directive::DirectiveKind;
use crate::front::wgsl::parse::lexer::{Lexer, Token};
@@ -364,7 +362,7 @@ impl Parser {
span: Span,
ctx: &mut ExpressionContext<'a, '_, '_>,
) -> Result<'a, Option<ast::ConstructorType<'a>>> {
if let Some(scalar) = conv::get_scalar_type(word) {
if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
return Ok(Some(ast::ConstructorType::Scalar(scalar)));
}
@@ -393,6 +391,13 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"vec2h" => {
return Ok(Some(ast::ConstructorType::Vector {
size: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"vec3" => ast::ConstructorType::PartialVector {
size: crate::VectorSize::Tri,
},
@@ -417,6 +422,13 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"vec3h" => {
return Ok(Some(ast::ConstructorType::Vector {
size: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"vec4" => ast::ConstructorType::PartialVector {
size: crate::VectorSize::Quad,
},
@@ -441,6 +453,13 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"vec4h" => {
return Ok(Some(ast::ConstructorType::Vector {
size: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat2x2" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Bi,
@@ -453,6 +472,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat2x2h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat2x3" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Tri,
@@ -465,6 +492,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat2x3h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat2x4" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Quad,
@@ -477,6 +512,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat2x4h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat3x2" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Bi,
@@ -489,6 +532,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat3x2h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat3x3" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Tri,
@@ -501,6 +552,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat3x3h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat3x4" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Quad,
@@ -513,6 +572,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat3x4h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat4x2" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Bi,
@@ -525,6 +592,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat4x2h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat4x3" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Tri,
@@ -537,6 +612,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat4x3h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"mat4x4" => ast::ConstructorType::PartialMatrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Quad,
@@ -549,6 +632,14 @@ impl Parser {
ty_span: Span::UNDEFINED,
}))
}
"mat4x4h" => {
return Ok(Some(ast::ConstructorType::Matrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
}))
}
"array" => ast::ConstructorType::PartialArray,
"atomic"
| "binding_array"
@@ -744,15 +835,17 @@ impl Parser {
}
(Token::Number(res), span) => {
let _ = lexer.next();
let num = res.map_err(|err| match err {
super::error::NumberError::UnimplementedF16 => {
Error::EnableExtensionNotEnabled {
kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16),
let num = res.map_err(|err| Error::BadNumber(span, err))?;
if let Some(enable_extension) = num.requires_enable_extension() {
if !lexer.enable_extensions.contains(enable_extension) {
return Err(Box::new(Error::EnableExtensionNotEnabled {
kind: enable_extension.into(),
span,
}
}));
}
err => Error::BadNumber(span, err),
})?;
}
ast::Expression::Literal(ast::Literal::Number(num))
}
(Token::Word("RAY_FLAG_NONE"), _) => {
@@ -1340,9 +1433,10 @@ impl Parser {
&mut self,
lexer: &mut Lexer<'a>,
word: &'a str,
span: Span,
ctx: &mut ExpressionContext<'a, '_, '_>,
) -> Result<'a, Option<ast::Type<'a>>> {
if let Some(scalar) = conv::get_scalar_type(word) {
if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
return Ok(Some(ast::Type::Scalar(scalar)));
}
@@ -1370,6 +1464,11 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"vec2h" => ast::Type::Vector {
size: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"vec3" => {
let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
ast::Type::Vector {
@@ -1393,6 +1492,11 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"vec3h" => ast::Type::Vector {
size: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"vec4" => {
let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
ast::Type::Vector {
@@ -1416,6 +1520,11 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"vec4h" => ast::Type::Vector {
size: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat2x2" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)?
}
@@ -1425,6 +1534,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat2x2h" => ast::Type::Matrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat2x3" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)?
}
@@ -1434,6 +1549,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat2x3h" => ast::Type::Matrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat2x4" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)?
}
@@ -1443,6 +1564,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat2x4h" => ast::Type::Matrix {
columns: crate::VectorSize::Bi,
rows: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat3x2" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)?
}
@@ -1452,6 +1579,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat3x2h" => ast::Type::Matrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat3x3" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)?
}
@@ -1461,6 +1594,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat3x3h" => ast::Type::Matrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat3x4" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)?
}
@@ -1470,6 +1609,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat3x4h" => ast::Type::Matrix {
columns: crate::VectorSize::Tri,
rows: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat4x2" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)?
}
@@ -1479,6 +1624,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat4x2h" => ast::Type::Matrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Bi,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat4x3" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)?
}
@@ -1488,6 +1639,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat4x3h" => ast::Type::Matrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Tri,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"mat4x4" => {
self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)?
}
@@ -1497,6 +1654,12 @@ impl Parser {
ty: ctx.new_scalar(Scalar::F32),
ty_span: Span::UNDEFINED,
},
"mat4x4h" => ast::Type::Matrix {
columns: crate::VectorSize::Quad,
rows: crate::VectorSize::Quad,
ty: ctx.new_scalar(Scalar::F16),
ty_span: Span::UNDEFINED,
},
"atomic" => {
let scalar = lexer.next_scalar_generic()?;
ast::Type::Atomic(scalar)
@@ -1763,7 +1926,7 @@ impl Parser {
let (name, span) = lexer.next_ident_with_span()?;
let ty = match this.type_decl_impl(lexer, name, ctx)? {
let ty = match this.type_decl_impl(lexer, name, span, ctx)? {
Some(ty) => ty,
None => {
ctx.unresolved.insert(ast::Dependency {

View File

@@ -1,7 +1,9 @@
use alloc::format;
use crate::front::wgsl::error::NumberError;
use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension;
use crate::front::wgsl::parse::lexer::Token;
use half::f16;
/// When using this type assume no Abstract Int/Float for now
#[derive(Copy, Clone, Debug, PartialEq)]
@@ -18,12 +20,23 @@ pub enum Number {
I64(i64),
/// Concrete u64
U64(u64),
/// Concrete f16
F16(f16),
/// Concrete f32
F32(f32),
/// Concrete f64
F64(f64),
}
impl Number {
pub(super) const fn requires_enable_extension(&self) -> Option<ImplementedEnableExtension> {
match *self {
Number::F16(_) => Some(ImplementedEnableExtension::F16),
_ => None,
}
}
}
pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) {
let (result, rest) = parse(input);
(Token::Number(result), rest)
@@ -369,7 +382,8 @@ fn parse_hex_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
// can only be ParseHexfErrorKind::Inexact but we can't check since it's private
_ => Err(NumberError::NotRepresentable),
},
Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
// TODO: f16 is not supported by hexf_parse
Some(FloatKind::F16) => Err(NumberError::NotRepresentable),
Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) {
Ok(num) => Ok(Number::F32(num)),
// can only be ParseHexfErrorKind::Inexact but we can't check since it's private
@@ -405,7 +419,12 @@ fn parse_dec_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
.then_some(Number::F64(num))
.ok_or(NumberError::NotRepresentable)
}
Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
Some(FloatKind::F16) => {
let num = input.parse::<f16>().unwrap(); // will never fail
num.is_finite()
.then_some(Number::F16(num))
.ok_or(NumberError::NotRepresentable)
}
}
}

View File

@@ -225,6 +225,7 @@ use alloc::{string::String, vec::Vec};
#[cfg(feature = "arbitrary")]
use arbitrary::Arbitrary;
use half::f16;
#[cfg(feature = "deserialize")]
use serde::Deserialize;
#[cfg(feature = "serialize")]
@@ -818,6 +819,8 @@ pub enum Literal {
F64(f64),
/// May not be NaN or infinity.
F32(f32),
/// May not be NaN or infinity.
F16(f16),
U32(u32),
I32(i32),
U64(u64),

View File

@@ -7,6 +7,8 @@ use alloc::{
use core::iter;
use arrayvec::ArrayVec;
use half::f16;
use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero};
use crate::{
arena::{Arena, Handle, HandleVec, UniqueArena},
@@ -208,6 +210,7 @@ gen_component_wise_extractor! {
literals: [
AbstractFloat => AbstractFloat: f64,
F32 => F32: f32,
F16 => F16: f16,
AbstractInt => AbstractInt: i64,
U32 => U32: u32,
I32 => I32: i32,
@@ -228,6 +231,7 @@ gen_component_wise_extractor! {
literals: [
AbstractFloat => Abstract: f64,
F32 => F32: f32,
F16 => F16: f16,
],
scalar_kinds: [
Float,
@@ -253,6 +257,7 @@ gen_component_wise_extractor! {
AbstractFloat => AbstractFloat: f64,
AbstractInt => AbstractInt: i64,
F32 => F32: f32,
F16 => F16: f16,
I32 => I32: i32,
],
scalar_kinds: [
@@ -1114,6 +1119,7 @@ impl<'a> ConstantEvaluator<'a> {
component_wise_scalar(self, span, [arg], |args| match args {
Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])),
Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])),
Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])),
Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
@@ -1145,9 +1151,13 @@ impl<'a> ConstantEvaluator<'a> {
}
)
}
crate::MathFunction::Saturate => {
component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) })
}
crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e {
Float::F16([e]) => Ok(Float::F16(
[e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))],
)),
Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])),
Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])),
}),
// trigonometry
crate::MathFunction::Cos => {
@@ -1201,7 +1211,35 @@ impl<'a> ConstantEvaluator<'a> {
component_wise_float!(self, span, [arg], |e| { Ok([e.floor()]) })
}
crate::MathFunction::Round => {
component_wise_float!(self, span, [arg], |e| { Ok([e.round_ties_even()]) })
component_wise_float(self, span, [arg], |e| match e {
Float::Abstract([e]) => Ok(Float::Abstract([e.round_ties_even()])),
Float::F32([e]) => Ok(Float::F32([e.round_ties_even()])),
Float::F16([e]) => {
// TODO: `round_ties_even` is not available on `half::f16` yet.
//
// This polyfill is shamelessly [~~stolen from~~ inspired by `ndarray-image`][polyfill source],
// which has licensing compatible with ours. See also
// <https://github.com/rust-lang/rust/issues/96710>.
//
// [polyfill source]: https://github.com/imeka/ndarray-ndimage/blob/8b14b4d6ecfbc96a8a052f802e342a7049c68d8f/src/lib.rs#L98
fn round_ties_even(x: f64) -> f64 {
let i = x as i64;
let f = (x - i as f64).abs();
if f == 0.5 {
if i & 1 == 1 {
// -1.5, 1.5, 3.5, ...
(x.abs() + 0.5).copysign(x)
} else {
(x.abs() - 0.5).copysign(x)
}
} else {
x.round()
}
}
Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))]))
}
})
}
crate::MathFunction::Fract => {
component_wise_float!(self, span, [arg], |e| {
@@ -1246,15 +1284,27 @@ impl<'a> ConstantEvaluator<'a> {
)
}
crate::MathFunction::Step => {
component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| {
Ok([if edge <= x { 1.0 } else { 0.0 }])
component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x {
Float::Abstract([edge, x]) => {
Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }]))
}
Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])),
Float::F16([edge, x]) => Ok(Float::F16([if edge <= x {
f16::one()
} else {
f16::zero()
}])),
})
}
crate::MathFunction::Sqrt => {
component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) })
}
crate::MathFunction::InverseSqrt => {
component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) })
component_wise_float(self, span, [arg], |e| match e {
Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])),
Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])),
Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])),
})
}
// bits
@@ -1549,6 +1599,7 @@ impl<'a> ConstantEvaluator<'a> {
Literal::I32(v) => v,
Literal::U32(v) => v as i32,
Literal::F32(v) => v as i32,
Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf
Literal::Bool(v) => v as i32,
Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
return make_error();
@@ -1560,6 +1611,7 @@ impl<'a> ConstantEvaluator<'a> {
Literal::I32(v) => v as u32,
Literal::U32(v) => v,
Literal::F32(v) => v as u32,
Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf
Literal::Bool(v) => v as u32,
Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
return make_error();
@@ -1575,6 +1627,7 @@ impl<'a> ConstantEvaluator<'a> {
Literal::F64(v) => v as i64,
Literal::I64(v) => v,
Literal::U64(v) => v as i64,
Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf
Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
}),
@@ -1586,9 +1639,22 @@ impl<'a> ConstantEvaluator<'a> {
Literal::F64(v) => v as u64,
Literal::I64(v) => v as u64,
Literal::U64(v) => v,
Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf
Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
}),
Sc::F16 => Literal::F16(match literal {
Literal::F16(v) => v,
Literal::F32(v) => f16::from_f32(v),
Literal::F64(v) => f16::from_f64(v),
Literal::Bool(v) => f16::from_u32(v as u32).unwrap(),
Literal::I64(v) => f16::from_i64(v).unwrap(),
Literal::U64(v) => f16::from_u64(v).unwrap(),
Literal::I32(v) => f16::from_i32(v).unwrap(),
Literal::U32(v) => f16::from_u32(v).unwrap(),
Literal::AbstractFloat(v) => f16::try_from_abstract(v)?,
Literal::AbstractInt(v) => f16::try_from_abstract(v)?,
}),
Sc::F32 => Literal::F32(match literal {
Literal::I32(v) => v as f32,
Literal::U32(v) => v as f32,
@@ -1597,12 +1663,14 @@ impl<'a> ConstantEvaluator<'a> {
Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
return make_error();
}
Literal::F16(v) => f16::to_f32(v),
Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
Literal::AbstractFloat(v) => f32::try_from_abstract(v)?,
}),
Sc::F64 => Literal::F64(match literal {
Literal::I32(v) => v as f64,
Literal::U32(v) => v as f64,
Literal::F16(v) => f16::to_f64(v),
Literal::F32(v) => v as f64,
Literal::F64(v) => v,
Literal::Bool(v) => v as u32 as f64,
@@ -1614,6 +1682,7 @@ impl<'a> ConstantEvaluator<'a> {
Literal::I32(v) => v != 0,
Literal::U32(v) => v != 0,
Literal::F32(v) => v != 0.0,
Literal::F16(v) => v != f16::zero(),
Literal::Bool(v) => v,
Literal::F64(_)
| Literal::I64(_)
@@ -1770,6 +1839,7 @@ impl<'a> ConstantEvaluator<'a> {
Literal::I32(v) => Literal::I32(v.wrapping_neg()),
Literal::I64(v) => Literal::I64(v.wrapping_neg()),
Literal::F32(v) => Literal::F32(-v),
Literal::F16(v) => Literal::F16(-v),
Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()),
Literal::AbstractFloat(v) => Literal::AbstractFloat(-v),
_ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg),
@@ -1918,6 +1988,14 @@ impl<'a> ConstantEvaluator<'a> {
_ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
})
}
(Literal::F16(a), Literal::F16(b)) => Literal::F16(match op {
BinaryOperator::Add => a + b,
BinaryOperator::Subtract => a - b,
BinaryOperator::Multiply => a * b,
BinaryOperator::Divide => a / b,
BinaryOperator::Modulo => a % b,
_ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
}),
(Literal::AbstractInt(a), Literal::AbstractInt(b)) => {
Literal::AbstractInt(match op {
BinaryOperator::Add => a.checked_add(b).ok_or_else(|| {
@@ -2522,6 +2600,32 @@ impl TryFromAbstract<f64> for u64 {
}
}
impl TryFromAbstract<f64> for f16 {
fn try_from_abstract(value: f64) -> Result<f16, ConstantEvaluatorError> {
let f = f16::from_f64(value);
if f.is_infinite() {
return Err(ConstantEvaluatorError::AutomaticConversionLossy {
value: format!("{value:?}"),
to_type: "f16",
});
}
Ok(f)
}
}
impl TryFromAbstract<i64> for f16 {
fn try_from_abstract(value: i64) -> Result<f16, ConstantEvaluatorError> {
let f = f16::from_i64(value);
if f.is_none() {
return Err(ConstantEvaluatorError::AutomaticConversionLossy {
value: format!("{value:?}"),
to_type: "f16",
});
}
Ok(f.unwrap())
}
}
#[cfg(test)]
mod tests {
use alloc::{vec, vec::Vec};

View File

@@ -80,6 +80,7 @@ impl From<super::StorageFormat> for super::Scalar {
pub enum HashableLiteral {
F64(u64),
F32(u32),
F16(u16),
U32(u32),
I32(i32),
U64(u64),
@@ -94,6 +95,7 @@ impl From<crate::Literal> for HashableLiteral {
match l {
crate::Literal::F64(v) => Self::F64(v.to_bits()),
crate::Literal::F32(v) => Self::F32(v.to_bits()),
crate::Literal::F16(v) => Self::F16(v.to_bits()),
crate::Literal::U32(v) => Self::U32(v),
crate::Literal::I32(v) => Self::I32(v),
crate::Literal::U64(v) => Self::U64(v),
@@ -132,6 +134,7 @@ impl crate::Literal {
match *self {
Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
Self::F16(_) => 2,
Self::Bool(_) => crate::BOOL_WIDTH,
Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
}
@@ -140,6 +143,7 @@ impl crate::Literal {
match *self {
Self::F64(_) => crate::Scalar::F64,
Self::F32(_) => crate::Scalar::F32,
Self::F16(_) => crate::Scalar::F16,
Self::U32(_) => crate::Scalar::U32,
Self::I32(_) => crate::Scalar::I32,
Self::U64(_) => crate::Scalar::U64,

View File

@@ -20,6 +20,10 @@ impl crate::ScalarKind {
}
impl crate::Scalar {
pub const F16: Self = Self {
kind: crate::ScalarKind::Float,
width: 2,
};
pub const I32: Self = Self {
kind: crate::ScalarKind::Sint,
width: 4,

View File

@@ -1794,7 +1794,7 @@ impl super::Validator {
}
pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> {
self.check_width(literal.scalar())?;
let _ = self.check_width(literal.scalar())?;
check_literal_value(literal)?;
Ok(())

View File

@@ -4,7 +4,7 @@ use bit_set::BitSet;
use super::{
analyzer::{FunctionInfo, GlobalUse},
Capabilities, Disalignment, FunctionError, ModuleInfo,
Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError,
};
use crate::arena::{Handle, UniqueArena};
use crate::span::{AddSpan as _, MapErrWithSpan as _, SpanProvider as _, WithSpan};
@@ -41,6 +41,8 @@ pub enum GlobalVariableError {
InitializerNotAllowed(crate::AddressSpace),
#[error("Storage address space doesn't support write-only access")]
StorageAddressSpaceWriteOnlyNotSupported,
#[error("Type is not valid for use as a push constant")]
InvalidPushConstantType(#[source] PushConstantError),
}
#[derive(Clone, Debug, thiserror::Error)]
@@ -596,6 +598,9 @@ impl super::Validator {
Capabilities::PUSH_CONSTANT,
));
}
if let Err(ref err) = type_info.push_constant_compatibility {
return Err(GlobalVariableError::InvalidPushConstantType(err.clone()));
}
(
TypeFlags::DATA
| TypeFlags::COPY

View File

@@ -31,7 +31,7 @@ pub use expression::{check_literal_value, LiteralError};
pub use expression::{ConstExpressionError, ExpressionError};
pub use function::{CallError, FunctionError, LocalVariableError};
pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};
pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError};
use self::handles::InvalidHandleError;
@@ -163,6 +163,8 @@ bitflags::bitflags! {
const TEXTURE_INT64_ATOMIC = 1 << 24;
/// Support for ray queries returning vertex position
const RAY_HIT_VERTEX_POSITION = 1 << 25;
/// Support for 16-bit floating-point types.
const SHADER_FLOAT16 = 1 << 26;
}
}

View File

@@ -169,8 +169,16 @@ pub enum WidthError {
Abstract,
}
#[derive(Clone, Debug, thiserror::Error)]
#[cfg_attr(test, derive(PartialEq))]
pub enum PushConstantError {
#[error("The scalar type {0:?} is not supported in push constants")]
InvalidScalar(crate::Scalar),
}
// Only makes sense if `flags.contains(HOST_SHAREABLE)`
type LayoutCompatibility = Result<Alignment, (Handle<crate::Type>, Disalignment)>;
type PushConstantCompatibility = Result<(), PushConstantError>;
fn check_member_layout(
accum: &mut LayoutCompatibility,
@@ -221,6 +229,7 @@ pub(super) struct TypeInfo {
pub flags: TypeFlags,
pub uniform_layout: LayoutCompatibility,
pub storage_layout: LayoutCompatibility,
pub push_constant_compatibility: PushConstantCompatibility,
}
impl TypeInfo {
@@ -229,6 +238,7 @@ impl TypeInfo {
flags: TypeFlags::empty(),
uniform_layout: Ok(Alignment::ONE),
storage_layout: Ok(Alignment::ONE),
push_constant_compatibility: Ok(()),
}
}
@@ -237,6 +247,7 @@ impl TypeInfo {
flags,
uniform_layout: Ok(alignment),
storage_layout: Ok(alignment),
push_constant_compatibility: Ok(()),
}
}
}
@@ -250,11 +261,15 @@ impl super::Validator {
}
}
pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> {
pub(super) const fn check_width(
&self,
scalar: crate::Scalar,
) -> Result<PushConstantCompatibility, WidthError> {
let mut push_constant_compatibility = Ok(());
let good = match scalar.kind {
crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH,
crate::ScalarKind::Float => {
if scalar.width == 8 {
crate::ScalarKind::Float => match scalar.width {
8 => {
if !self.capabilities.contains(Capabilities::FLOAT64) {
return Err(WidthError::MissingCapability {
name: "f64",
@@ -262,10 +277,21 @@ impl super::Validator {
});
}
true
} else {
scalar.width == 4
}
}
2 => {
if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) {
return Err(WidthError::MissingCapability {
name: "f16",
flag: "FLOAT16",
});
}
push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar));
true
}
_ => scalar.width == 4,
},
crate::ScalarKind::Sint => {
if scalar.width == 8 {
if !self.capabilities.contains(Capabilities::SHADER_INT64) {
@@ -297,7 +323,7 @@ impl super::Validator {
}
};
if good {
Ok(())
Ok(push_constant_compatibility)
} else {
Err(WidthError::Invalid(scalar.kind, scalar.width))
}
@@ -317,13 +343,13 @@ impl super::Validator {
use crate::TypeInner as Ti;
Ok(match gctx.types[handle].inner {
Ti::Scalar(scalar) => {
self.check_width(scalar)?;
let push_constant_compatibility = self.check_width(scalar)?;
let shareable = if scalar.kind.is_numeric() {
TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
} else {
TypeFlags::empty()
};
TypeInfo::new(
let mut type_info = TypeInfo::new(
TypeFlags::DATA
| TypeFlags::SIZED
| TypeFlags::COPY
@@ -332,16 +358,18 @@ impl super::Validator {
| TypeFlags::CREATION_RESOLVED
| shareable,
Alignment::from_width(scalar.width),
)
);
type_info.push_constant_compatibility = push_constant_compatibility;
type_info
}
Ti::Vector { size, scalar } => {
self.check_width(scalar)?;
let push_constant_compatibility = self.check_width(scalar)?;
let shareable = if scalar.kind.is_numeric() {
TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
} else {
TypeFlags::empty()
};
TypeInfo::new(
let mut type_info = TypeInfo::new(
TypeFlags::DATA
| TypeFlags::SIZED
| TypeFlags::COPY
@@ -350,7 +378,9 @@ impl super::Validator {
| TypeFlags::CREATION_RESOLVED
| shareable,
Alignment::from(size) * Alignment::from_width(scalar.width),
)
);
type_info.push_constant_compatibility = push_constant_compatibility;
type_info
}
Ti::Matrix {
columns: _,
@@ -360,8 +390,8 @@ impl super::Validator {
if scalar.kind != crate::ScalarKind::Float {
return Err(TypeError::MatrixElementNotFloat);
}
self.check_width(scalar)?;
TypeInfo::new(
let push_constant_compatibility = self.check_width(scalar)?;
let mut type_info = TypeInfo::new(
TypeFlags::DATA
| TypeFlags::SIZED
| TypeFlags::COPY
@@ -370,7 +400,9 @@ impl super::Validator {
| TypeFlags::CONSTRUCTIBLE
| TypeFlags::CREATION_RESOLVED,
Alignment::from(rows) * Alignment::from_width(scalar.width),
)
);
type_info.push_constant_compatibility = push_constant_compatibility;
type_info
}
Ti::Atomic(scalar) => {
match scalar {
@@ -465,7 +497,7 @@ impl super::Validator {
// However, some cases are trivial: All our implicit base types
// are DATA and SIZED, so we can never return
// `InvalidPointerBase` or `InvalidPointerToUnsized`.
self.check_width(scalar)?;
let _ = self.check_width(scalar)?;
// `Validator::validate_function` actually checks the address
// space of pointer arguments explicitly before checking the
@@ -551,6 +583,7 @@ impl super::Validator {
flags: base_info.flags & type_info_mask,
uniform_layout,
storage_layout,
push_constant_compatibility: base_info.push_constant_compatibility.clone(),
}
}
Ti::Struct { ref members, span } => {
@@ -631,6 +664,10 @@ impl super::Validator {
base_info.storage_layout,
handle,
);
if base_info.push_constant_compatibility.is_err() {
ti.push_constant_compatibility =
base_info.push_constant_compatibility.clone();
}
// Validate rule: If a structure member itself has a structure type S,
// then the number of bytes between the start of that member and

View File

@@ -0,0 +1,57 @@
#version 460
#extension GL_AMD_gpu_shader_half_float: enable
layout(set = 0, binding = 0) uniform A {
float16_t a_1;
f16vec2 a_vec2;
f16vec3 a_vec3;
f16vec4 a_vec4;
// So the rules here are particularly nasty for any f16 matries in uniform buffers
// as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
// buffer is over-aligned to what naga-ir wants.
//
// This is https://github.com/gfx-rs/wgpu/issues/4375.
// f16mat2 a_mat2;
// f16mat2x3 a_mat2x3;
// f16mat2x4 a_mat2x4;
// f16mat3x2 a_mat3x2;
// f16mat3 a_mat3;
// f16mat3x4 a_mat3x4;
// f16mat4x2 a_mat4x2;
// f16mat4x3 a_mat4x3;
// f16mat4 a_mat4;
};
layout(set = 0, binding = 1) buffer B {
float16_t b_1;
f16vec2 b_vec2;
f16vec3 b_vec3;
f16vec4 b_vec4;
f16mat2 b_mat2;
f16mat2x3 b_mat2x3;
f16mat2x4 b_mat2x4;
f16mat3x2 b_mat3x2;
f16mat3 b_mat3;
f16mat3x4 b_mat3x4;
f16mat4x2 b_mat4x2;
f16mat4x3 b_mat4x3;
f16mat4 b_mat4;
};
void main() {
b_1 = a_1;
b_vec2 = a_vec2;
b_vec3 = a_vec3;
b_vec4 = a_vec4;
// b_mat2 = a_mat2;
// b_mat2x3 = a_mat2x3;
// b_mat2x4 = a_mat2x4;
// b_mat3x2 = a_mat3x2;
// b_mat3 = a_mat3;
// b_mat3x4 = a_mat3x4;
// b_mat4x2 = a_mat4x2;
// b_mat4x3 = a_mat4x3;
// b_mat4 = a_mat4;
}

View File

@@ -0,0 +1 @@
god_mode = true

View File

@@ -0,0 +1,57 @@
#version 460
#extension GL_AMD_gpu_shader_half_float: enable
layout(set = 0, binding = 0) uniform A {
float16_t a_1;
f16vec2 a_vec2;
f16vec3 a_vec3;
f16vec4 a_vec4;
// So the rules here are particularly nasty for any f16 matries in uniform buffers
// as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
// buffer is over-aligned to what naga-ir wants.
//
// This is https://github.com/gfx-rs/wgpu/issues/4375.
// f16mat2 a_mat2;
// f16mat2x3 a_mat2x3;
// f16mat2x4 a_mat2x4;
// f16mat3x2 a_mat3x2;
// f16mat3 a_mat3;
// f16mat3x4 a_mat3x4;
// f16mat4x2 a_mat4x2;
// f16mat4x3 a_mat4x3;
// f16mat4 a_mat4;
};
layout(set = 0, binding = 1) buffer B {
float16_t b_1;
f16vec2 b_vec2;
f16vec3 b_vec3;
f16vec4 b_vec4;
f16mat2 b_mat2;
f16mat2x3 b_mat2x3;
f16mat2x4 b_mat2x4;
f16mat3x2 b_mat3x2;
f16mat3 b_mat3;
f16mat3x4 b_mat3x4;
f16mat4x2 b_mat4x2;
f16mat4x3 b_mat4x3;
f16mat4 b_mat4;
};
void main() {
b_1 = a_1;
b_vec2 = a_vec2;
b_vec3 = a_vec3;
b_vec4 = a_vec4;
// b_mat2 = a_mat2;
// b_mat2x3 = a_mat2x3;
// b_mat2x4 = a_mat2x4;
// b_mat3x2 = a_mat3x2;
// b_mat3 = a_mat3;
// b_mat3x4 = a_mat3x4;
// b_mat4x2 = a_mat4x2;
// b_mat4x3 = a_mat4x3;
// b_mat4 = a_mat4;
}

View File

@@ -0,0 +1,130 @@
; SPIR-V
; Version: 1.0
; Generator: Google Shaderc over Glslang; 11
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability StorageBuffer16BitAccess
OpCapability UniformAndStorageBuffer16BitAccess
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 460
OpSourceExtension "GL_AMD_gpu_shader_half_float"
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
OpSourceExtension "GL_GOOGLE_include_directive"
OpName %main "main"
OpName %B "B"
OpMemberName %B 0 "b_1"
OpMemberName %B 1 "b_vec2"
OpMemberName %B 2 "b_vec3"
OpMemberName %B 3 "b_vec4"
OpMemberName %B 4 "b_mat2"
OpMemberName %B 5 "b_mat2x3"
OpMemberName %B 6 "b_mat2x4"
OpMemberName %B 7 "b_mat3x2"
OpMemberName %B 8 "b_mat3"
OpMemberName %B 9 "b_mat3x4"
OpMemberName %B 10 "b_mat4x2"
OpMemberName %B 11 "b_mat4x3"
OpMemberName %B 12 "b_mat4"
OpName %_ ""
OpName %A "A"
OpMemberName %A 0 "a_1"
OpMemberName %A 1 "a_vec2"
OpMemberName %A 2 "a_vec3"
OpMemberName %A 3 "a_vec4"
OpName %__0 ""
OpDecorate %B BufferBlock
OpMemberDecorate %B 0 Offset 0
OpMemberDecorate %B 1 Offset 4
OpMemberDecorate %B 2 Offset 8
OpMemberDecorate %B 3 Offset 16
OpMemberDecorate %B 4 ColMajor
OpMemberDecorate %B 4 MatrixStride 4
OpMemberDecorate %B 4 Offset 24
OpMemberDecorate %B 5 ColMajor
OpMemberDecorate %B 5 MatrixStride 8
OpMemberDecorate %B 5 Offset 32
OpMemberDecorate %B 6 ColMajor
OpMemberDecorate %B 6 MatrixStride 8
OpMemberDecorate %B 6 Offset 48
OpMemberDecorate %B 7 ColMajor
OpMemberDecorate %B 7 MatrixStride 4
OpMemberDecorate %B 7 Offset 64
OpMemberDecorate %B 8 ColMajor
OpMemberDecorate %B 8 MatrixStride 8
OpMemberDecorate %B 8 Offset 80
OpMemberDecorate %B 9 ColMajor
OpMemberDecorate %B 9 MatrixStride 8
OpMemberDecorate %B 9 Offset 104
OpMemberDecorate %B 10 ColMajor
OpMemberDecorate %B 10 MatrixStride 4
OpMemberDecorate %B 10 Offset 128
OpMemberDecorate %B 11 ColMajor
OpMemberDecorate %B 11 MatrixStride 8
OpMemberDecorate %B 11 Offset 144
OpMemberDecorate %B 12 ColMajor
OpMemberDecorate %B 12 MatrixStride 8
OpMemberDecorate %B 12 Offset 176
OpDecorate %_ Binding 1
OpDecorate %_ DescriptorSet 0
OpDecorate %A Block
OpMemberDecorate %A 0 Offset 0
OpMemberDecorate %A 1 Offset 4
OpMemberDecorate %A 2 Offset 8
OpMemberDecorate %A 3 Offset 16
OpDecorate %__0 Binding 0
OpDecorate %__0 DescriptorSet 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%half = OpTypeFloat 16
%v2half = OpTypeVector %half 2
%v3half = OpTypeVector %half 3
%v4half = OpTypeVector %half 4
%mat2v2half = OpTypeMatrix %v2half 2
%mat2v3half = OpTypeMatrix %v3half 2
%mat2v4half = OpTypeMatrix %v4half 2
%mat3v2half = OpTypeMatrix %v2half 3
%mat3v3half = OpTypeMatrix %v3half 3
%mat3v4half = OpTypeMatrix %v4half 3
%mat4v2half = OpTypeMatrix %v2half 4
%mat4v3half = OpTypeMatrix %v3half 4
%mat4v4half = OpTypeMatrix %v4half 4
%B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half
%_ptr_Uniform_B = OpTypePointer Uniform %B
%_ = OpVariable %_ptr_Uniform_B Uniform
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%A = OpTypeStruct %half %v2half %v3half %v4half
%_ptr_Uniform_A = OpTypePointer Uniform %A
%__0 = OpVariable %_ptr_Uniform_A Uniform
%_ptr_Uniform_half = OpTypePointer Uniform %half
%int_1 = OpConstant %int 1
%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
%int_2 = OpConstant %int 2
%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
%int_3 = OpConstant %int 3
%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
%main = OpFunction %void None %3
%5 = OpLabel
%28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0
%29 = OpLoad %half %28
%30 = OpAccessChain %_ptr_Uniform_half %_ %int_0
OpStore %30 %29
%33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1
%34 = OpLoad %v2half %33
%35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1
OpStore %35 %34
%38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2
%39 = OpLoad %v3half %38
%40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2
OpStore %40 %39
%43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3
%44 = OpLoad %v4half %43
%45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3
OpStore %45 %44
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1 @@
god_mode = true

View File

@@ -1,6 +1,6 @@
struct PushConstants {
index: u32,
double: vec2<f64>,
double: vec2<f32>,
}
var<push_constant> pc: PushConstants;

View File

@@ -0,0 +1,15 @@
# No GLSL support for f16
targets = "SPIRV | METAL | HLSL | WGSL"
god_mode = true
[spv]
version = [1, 0]
[hlsl]
shader_model = "V6_2"
special_constants_binding = { space = 1, register = 0 }
push_constants_target = { space = 0, register = 0 }
lang_version = [1, 0]
per_entry_point_map = {}
zero_initialize_workgroup_memory = true

127
naga/tests/in/wgsl/f16.wgsl Normal file
View File

@@ -0,0 +1,127 @@
enable f16;
var<private> private_variable: f16 = 1h;
const constant_variable: f16 = f16(15.2);
struct UniformCompatible {
// Other types
val_u32: u32,
val_i32: i32,
val_f32: f32,
// f16
val_f16: f16,
val_f16_2: vec2<f16>,
val_f16_3: vec3<f16>,
val_f16_4: vec4<f16>,
final_value: f16,
val_mat2x2: mat2x2<f16>,
val_mat2x3: mat2x3<f16>,
val_mat2x4: mat2x4<f16>,
val_mat3x2: mat3x2<f16>,
val_mat3x3: mat3x3<f16>,
val_mat3x4: mat3x4<f16>,
val_mat4x2: mat4x2<f16>,
val_mat4x3: mat4x3<f16>,
val_mat4x4: mat4x4<f16>,
}
struct StorageCompatible {
val_f16_array_2: array<f16, 2>,
}
struct LayoutTest {
scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32
}
@group(0) @binding(0)
var<uniform> input_uniform: UniformCompatible;
@group(0) @binding(1)
var<storage> input_storage: UniformCompatible;
@group(0) @binding(2)
var<storage> input_arrays: StorageCompatible;
@group(0) @binding(3)
var<storage, read_write> output: UniformCompatible;
@group(0) @binding(4)
var<storage, read_write> output_arrays: StorageCompatible;
fn f16_function(x: f16) -> f16 {
var val: f16 = f16(constant_variable);
// A number too big for f16
val += 1h - 33333h;
// Constructing an f16 from an AbstractInt
val += val + f16(5.);
// Constructing a f16 from other types and other types from f16.
val += f16(input_uniform.val_f32 + f32(val));
// Constructing a vec3<i64> from a i64
val += vec3<f16>(input_uniform.val_f16).z;
// Reading/writing to a uniform/storage buffer
output.val_f16 = input_uniform.val_f16 + input_storage.val_f16;
output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2;
output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3;
output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4;
output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2;
output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3;
output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4;
output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2;
output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3;
output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4;
output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2;
output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3;
output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4;
output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2;
// We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
// constants based on the size of the type, and which are arguments.
// Numeric functions
val += abs(val);
val += clamp(val, val, val);
val += dot(vec2(val), vec2(val));
val += max(val, val);
val += min(val, val);
val += sign(val);
val += f16(1.0);
// We use the shorthand aliases here to ensure the aliases
// work correctly.
// Cast vectors to/from f32
let float_vec2 = vec2f(input_uniform.val_f16_2);
output.val_f16_2 = vec2h(float_vec2);
let float_vec3 = vec3f(input_uniform.val_f16_3);
output.val_f16_3 = vec3h(float_vec3);
let float_vec4 = vec4f(input_uniform.val_f16_4);
output.val_f16_4 = vec4h(float_vec4);
// Cast matrices to/from f32
output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2));
output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3));
output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4));
output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2));
output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3));
output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4));
output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2));
output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3));
output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4));
// Make sure all the variables are used.
return val;
}
@compute @workgroup_size(1)
fn main() {
output.final_value = f16_function(2h);
}

View File

@@ -0,0 +1,351 @@
struct NagaConstants {
int first_vertex;
int first_instance;
uint other;
};
ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
struct UniformCompatible {
uint val_u32_;
int val_i32_;
float val_f32_;
half val_f16_;
half2 val_f16_2_;
int _pad5_0;
half3 val_f16_3_;
half4 val_f16_4_;
half final_value;
half2 val_mat2x2__0; half2 val_mat2x2__1;
int _pad9_0;
row_major half2x3 val_mat2x3_;
row_major half2x4 val_mat2x4_;
half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2;
int _pad12_0;
row_major half3x3 val_mat3x3_;
row_major half3x4 val_mat3x4_;
half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3;
row_major half4x3 val_mat4x3_;
row_major half4x4 val_mat4x4_;
};
struct StorageCompatible {
half val_f16_array_2_[2];
};
struct LayoutTest {
half scalar1_;
half scalar2_;
int _pad2_0;
half3 v3_;
half tuck_in;
half scalar4_;
uint larger;
};
static const half constant_variable = 15.203125h;
static half private_variable = 1.0h;
cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
ByteAddressBuffer input_storage : register(t1);
ByteAddressBuffer input_arrays : register(t2);
RWByteAddressBuffer output : register(u3);
RWByteAddressBuffer output_arrays : register(u4);
half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) {
return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1);
}
void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) {
obj.val_mat2x2__0 = mat[0];
obj.val_mat2x2__1 = mat[1];
}
void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
switch(mat_idx) {
case 0: { obj.val_mat2x2__0 = vec; break; }
case 1: { obj.val_mat2x2__1 = vec; break; }
}
}
void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
switch(mat_idx) {
case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; }
case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; }
}
}
half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) {
return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2);
}
void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) {
obj.val_mat3x2__0 = mat[0];
obj.val_mat3x2__1 = mat[1];
obj.val_mat3x2__2 = mat[2];
}
void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
switch(mat_idx) {
case 0: { obj.val_mat3x2__0 = vec; break; }
case 1: { obj.val_mat3x2__1 = vec; break; }
case 2: { obj.val_mat3x2__2 = vec; break; }
}
}
void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
switch(mat_idx) {
case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; }
case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; }
case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; }
}
}
half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) {
return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3);
}
void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) {
obj.val_mat4x2__0 = mat[0];
obj.val_mat4x2__1 = mat[1];
obj.val_mat4x2__2 = mat[2];
obj.val_mat4x2__3 = mat[3];
}
void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
switch(mat_idx) {
case 0: { obj.val_mat4x2__0 = vec; break; }
case 1: { obj.val_mat4x2__1 = vec; break; }
case 2: { obj.val_mat4x2__2 = vec; break; }
case 3: { obj.val_mat4x2__3 = vec; break; }
}
}
void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
switch(mat_idx) {
case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; }
case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; }
case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; }
case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; }
}
}
typedef half ret_Constructarray2_half_[2];
ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) {
half ret[2] = { arg0, arg1 };
return ret;
}
half f16_function(half x)
{
half val = 15.203125h;
half _e4 = val;
val = (_e4 + -33344.0h);
half _e6 = val;
half _e9 = val;
val = (_e9 + (_e6 + 5.0h));
float _e13 = input_uniform.val_f32_;
half _e14 = val;
half _e18 = val;
val = (_e18 + half((_e13 + float(_e14))));
half _e22 = input_uniform.val_f16_;
half _e25 = val;
val = (_e25 + (_e22).xxx.z);
half _e31 = input_uniform.val_f16_;
half _e34 = input_storage.Load<half>(12);
output.Store(12, (_e31 + _e34));
half2 _e40 = input_uniform.val_f16_2_;
half2 _e43 = input_storage.Load<half2>(16);
output.Store(16, (_e40 + _e43));
half3 _e49 = input_uniform.val_f16_3_;
half3 _e52 = input_storage.Load<half3>(24);
output.Store(24, (_e49 + _e52));
half4 _e58 = input_uniform.val_f16_4_;
half4 _e61 = input_storage.Load<half4>(32);
output.Store(32, (_e58 + _e61));
half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
half2x2 _e70 = half2x2(input_storage.Load<half2>(44+0), input_storage.Load<half2>(44+4));
{
half2x2 _value2 = (_e67 + _e70);
output.Store(44+0, _value2[0]);
output.Store(44+4, _value2[1]);
}
half2x3 _e76 = input_uniform.val_mat2x3_;
half2x3 _e79 = half2x3(input_storage.Load<half3>(56+0), input_storage.Load<half3>(56+8));
{
half2x3 _value2 = (_e76 + _e79);
output.Store(56+0, _value2[0]);
output.Store(56+8, _value2[1]);
}
half2x4 _e85 = input_uniform.val_mat2x4_;
half2x4 _e88 = half2x4(input_storage.Load<half4>(72+0), input_storage.Load<half4>(72+8));
{
half2x4 _value2 = (_e85 + _e88);
output.Store(72+0, _value2[0]);
output.Store(72+8, _value2[1]);
}
half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
half3x2 _e97 = half3x2(input_storage.Load<half2>(88+0), input_storage.Load<half2>(88+4), input_storage.Load<half2>(88+8));
{
half3x2 _value2 = (_e94 + _e97);
output.Store(88+0, _value2[0]);
output.Store(88+4, _value2[1]);
output.Store(88+8, _value2[2]);
}
half3x3 _e103 = input_uniform.val_mat3x3_;
half3x3 _e106 = half3x3(input_storage.Load<half3>(104+0), input_storage.Load<half3>(104+8), input_storage.Load<half3>(104+16));
{
half3x3 _value2 = (_e103 + _e106);
output.Store(104+0, _value2[0]);
output.Store(104+8, _value2[1]);
output.Store(104+16, _value2[2]);
}
half3x4 _e112 = input_uniform.val_mat3x4_;
half3x4 _e115 = half3x4(input_storage.Load<half4>(128+0), input_storage.Load<half4>(128+8), input_storage.Load<half4>(128+16));
{
half3x4 _value2 = (_e112 + _e115);
output.Store(128+0, _value2[0]);
output.Store(128+8, _value2[1]);
output.Store(128+16, _value2[2]);
}
half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
half4x2 _e124 = half4x2(input_storage.Load<half2>(152+0), input_storage.Load<half2>(152+4), input_storage.Load<half2>(152+8), input_storage.Load<half2>(152+12));
{
half4x2 _value2 = (_e121 + _e124);
output.Store(152+0, _value2[0]);
output.Store(152+4, _value2[1]);
output.Store(152+8, _value2[2]);
output.Store(152+12, _value2[3]);
}
half4x3 _e130 = input_uniform.val_mat4x3_;
half4x3 _e133 = half4x3(input_storage.Load<half3>(168+0), input_storage.Load<half3>(168+8), input_storage.Load<half3>(168+16), input_storage.Load<half3>(168+24));
{
half4x3 _value2 = (_e130 + _e133);
output.Store(168+0, _value2[0]);
output.Store(168+8, _value2[1]);
output.Store(168+16, _value2[2]);
output.Store(168+24, _value2[3]);
}
half4x4 _e139 = input_uniform.val_mat4x4_;
half4x4 _e142 = half4x4(input_storage.Load<half4>(200+0), input_storage.Load<half4>(200+8), input_storage.Load<half4>(200+16), input_storage.Load<half4>(200+24));
{
half4x4 _value2 = (_e139 + _e142);
output.Store(200+0, _value2[0]);
output.Store(200+8, _value2[1]);
output.Store(200+16, _value2[2]);
output.Store(200+24, _value2[3]);
}
half _e148[2] = Constructarray2_half_(input_arrays.Load<half>(0+0), input_arrays.Load<half>(0+2));
{
half _value2[2] = _e148;
output_arrays.Store(0+0, _value2[0]);
output_arrays.Store(0+2, _value2[1]);
}
half _e149 = val;
half _e151 = val;
val = (_e151 + abs(_e149));
half _e153 = val;
half _e154 = val;
half _e155 = val;
half _e157 = val;
val = (_e157 + clamp(_e153, _e154, _e155));
half _e159 = val;
half _e161 = val;
half _e164 = val;
val = (_e164 + dot((_e159).xx, (_e161).xx));
half _e166 = val;
half _e167 = val;
half _e169 = val;
val = (_e169 + max(_e166, _e167));
half _e171 = val;
half _e172 = val;
half _e174 = val;
val = (_e174 + min(_e171, _e172));
half _e176 = val;
half _e178 = val;
val = (_e178 + sign(_e176));
half _e181 = val;
val = (_e181 + 1.0h);
half2 _e185 = input_uniform.val_f16_2_;
float2 float_vec2_ = float2(_e185);
output.Store(16, half2(float_vec2_));
half3 _e192 = input_uniform.val_f16_3_;
float3 float_vec3_ = float3(_e192);
output.Store(24, half3(float_vec3_));
half4 _e199 = input_uniform.val_f16_4_;
float4 float_vec4_ = float4(_e199);
output.Store(32, half4(float_vec4_));
half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
{
half2x2 _value2 = half2x2(float2x2(_e208));
output.Store(44+0, _value2[0]);
output.Store(44+4, _value2[1]);
}
half2x3 _e215 = input_uniform.val_mat2x3_;
{
half2x3 _value2 = half2x3(float2x3(_e215));
output.Store(56+0, _value2[0]);
output.Store(56+8, _value2[1]);
}
half2x4 _e222 = input_uniform.val_mat2x4_;
{
half2x4 _value2 = half2x4(float2x4(_e222));
output.Store(72+0, _value2[0]);
output.Store(72+8, _value2[1]);
}
half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
{
half3x2 _value2 = half3x2(float3x2(_e229));
output.Store(88+0, _value2[0]);
output.Store(88+4, _value2[1]);
output.Store(88+8, _value2[2]);
}
half3x3 _e236 = input_uniform.val_mat3x3_;
{
half3x3 _value2 = half3x3(float3x3(_e236));
output.Store(104+0, _value2[0]);
output.Store(104+8, _value2[1]);
output.Store(104+16, _value2[2]);
}
half3x4 _e243 = input_uniform.val_mat3x4_;
{
half3x4 _value2 = half3x4(float3x4(_e243));
output.Store(128+0, _value2[0]);
output.Store(128+8, _value2[1]);
output.Store(128+16, _value2[2]);
}
half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
{
half4x2 _value2 = half4x2(float4x2(_e250));
output.Store(152+0, _value2[0]);
output.Store(152+4, _value2[1]);
output.Store(152+8, _value2[2]);
output.Store(152+12, _value2[3]);
}
half4x3 _e257 = input_uniform.val_mat4x3_;
{
half4x3 _value2 = half4x3(float4x3(_e257));
output.Store(168+0, _value2[0]);
output.Store(168+8, _value2[1]);
output.Store(168+16, _value2[2]);
output.Store(168+24, _value2[3]);
}
half4x4 _e264 = input_uniform.val_mat4x4_;
{
half4x4 _value2 = half4x4(float4x4(_e264));
output.Store(200+0, _value2[0]);
output.Store(200+8, _value2[1]);
output.Store(200+16, _value2[2]);
output.Store(200+24, _value2[3]);
}
half _e267 = val;
return _e267;
}
[numthreads(1, 1, 1)]
void main()
{
const half _e3 = f16_function(2.0h);
output.Store(40, _e3);
return;
}

View File

@@ -0,0 +1,12 @@
(
vertex:[
],
fragment:[
],
compute:[
(
entry_point:"main",
target_profile:"cs_6_2",
),
],
)

View File

@@ -6,7 +6,7 @@ using metal::uint;
struct PushConstants {
uint index;
char _pad1[12];
char _pad1[4];
metal::float2 double_;
};
struct FragmentIn {

177
naga/tests/out/msl/f16.msl Normal file
View File

@@ -0,0 +1,177 @@
// language: metal1.0
#include <metal_stdlib>
#include <simd/simd.h>
using metal::uint;
struct UniformCompatible {
uint val_u32_;
int val_i32_;
float val_f32_;
half val_f16_;
char _pad4[2];
metal::half2 val_f16_2_;
char _pad5[4];
metal::half3 val_f16_3_;
metal::half4 val_f16_4_;
half final_value;
char _pad8[2];
metal::half2x2 val_mat2x2_;
char _pad9[4];
metal::half2x3 val_mat2x3_;
metal::half2x4 val_mat2x4_;
metal::half3x2 val_mat3x2_;
char _pad12[4];
metal::half3x3 val_mat3x3_;
metal::half3x4 val_mat3x4_;
metal::half4x2 val_mat4x2_;
metal::half4x3 val_mat4x3_;
metal::half4x4 val_mat4x4_;
};
struct type_16 {
half inner[2];
};
struct StorageCompatible {
type_16 val_f16_array_2_;
};
struct LayoutTest {
half scalar1_;
half scalar2_;
char _pad2[4];
metal::packed_half3 v3_;
half tuck_in;
half scalar4_;
char _pad5[2];
uint larger;
};
constant half constant_variable = 15.203125h;
half f16_function(
half x,
constant UniformCompatible& input_uniform,
device UniformCompatible const& input_storage,
device StorageCompatible const& input_arrays,
device UniformCompatible& output,
device StorageCompatible& output_arrays
) {
half val = 15.203125h;
half _e4 = val;
val = _e4 + -33344.0h;
half _e6 = val;
half _e9 = val;
val = _e9 + (_e6 + 5.0h);
float _e13 = input_uniform.val_f32_;
half _e14 = val;
half _e18 = val;
val = _e18 + static_cast<half>(_e13 + static_cast<float>(_e14));
half _e22 = input_uniform.val_f16_;
half _e25 = val;
val = _e25 + metal::half3(_e22).z;
half _e31 = input_uniform.val_f16_;
half _e34 = input_storage.val_f16_;
output.val_f16_ = _e31 + _e34;
metal::half2 _e40 = input_uniform.val_f16_2_;
metal::half2 _e43 = input_storage.val_f16_2_;
output.val_f16_2_ = _e40 + _e43;
metal::half3 _e49 = input_uniform.val_f16_3_;
metal::half3 _e52 = input_storage.val_f16_3_;
output.val_f16_3_ = _e49 + _e52;
metal::half4 _e58 = input_uniform.val_f16_4_;
metal::half4 _e61 = input_storage.val_f16_4_;
output.val_f16_4_ = _e58 + _e61;
metal::half2x2 _e67 = input_uniform.val_mat2x2_;
metal::half2x2 _e70 = input_storage.val_mat2x2_;
output.val_mat2x2_ = _e67 + _e70;
metal::half2x3 _e76 = input_uniform.val_mat2x3_;
metal::half2x3 _e79 = input_storage.val_mat2x3_;
output.val_mat2x3_ = _e76 + _e79;
metal::half2x4 _e85 = input_uniform.val_mat2x4_;
metal::half2x4 _e88 = input_storage.val_mat2x4_;
output.val_mat2x4_ = _e85 + _e88;
metal::half3x2 _e94 = input_uniform.val_mat3x2_;
metal::half3x2 _e97 = input_storage.val_mat3x2_;
output.val_mat3x2_ = _e94 + _e97;
metal::half3x3 _e103 = input_uniform.val_mat3x3_;
metal::half3x3 _e106 = input_storage.val_mat3x3_;
output.val_mat3x3_ = _e103 + _e106;
metal::half3x4 _e112 = input_uniform.val_mat3x4_;
metal::half3x4 _e115 = input_storage.val_mat3x4_;
output.val_mat3x4_ = _e112 + _e115;
metal::half4x2 _e121 = input_uniform.val_mat4x2_;
metal::half4x2 _e124 = input_storage.val_mat4x2_;
output.val_mat4x2_ = _e121 + _e124;
metal::half4x3 _e130 = input_uniform.val_mat4x3_;
metal::half4x3 _e133 = input_storage.val_mat4x3_;
output.val_mat4x3_ = _e130 + _e133;
metal::half4x4 _e139 = input_uniform.val_mat4x4_;
metal::half4x4 _e142 = input_storage.val_mat4x4_;
output.val_mat4x4_ = _e139 + _e142;
type_16 _e148 = input_arrays.val_f16_array_2_;
output_arrays.val_f16_array_2_ = _e148;
half _e149 = val;
half _e151 = val;
val = _e151 + metal::abs(_e149);
half _e153 = val;
half _e154 = val;
half _e155 = val;
half _e157 = val;
val = _e157 + metal::clamp(_e153, _e154, _e155);
half _e159 = val;
half _e161 = val;
half _e164 = val;
val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161));
half _e166 = val;
half _e167 = val;
half _e169 = val;
val = _e169 + metal::max(_e166, _e167);
half _e171 = val;
half _e172 = val;
half _e174 = val;
val = _e174 + metal::min(_e171, _e172);
half _e176 = val;
half _e178 = val;
val = _e178 + metal::sign(_e176);
half _e181 = val;
val = _e181 + 1.0h;
metal::half2 _e185 = input_uniform.val_f16_2_;
metal::float2 float_vec2_ = static_cast<metal::float2>(_e185);
output.val_f16_2_ = static_cast<metal::half2>(float_vec2_);
metal::half3 _e192 = input_uniform.val_f16_3_;
metal::float3 float_vec3_ = static_cast<metal::float3>(_e192);
output.val_f16_3_ = static_cast<metal::half3>(float_vec3_);
metal::half4 _e199 = input_uniform.val_f16_4_;
metal::float4 float_vec4_ = static_cast<metal::float4>(_e199);
output.val_f16_4_ = static_cast<metal::half4>(float_vec4_);
metal::half2x2 _e208 = input_uniform.val_mat2x2_;
output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208));
metal::half2x3 _e215 = input_uniform.val_mat2x3_;
output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215));
metal::half2x4 _e222 = input_uniform.val_mat2x4_;
output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222));
metal::half3x2 _e229 = input_uniform.val_mat3x2_;
output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229));
metal::half3x3 _e236 = input_uniform.val_mat3x3_;
output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236));
metal::half3x4 _e243 = input_uniform.val_mat3x4_;
output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243));
metal::half4x2 _e250 = input_uniform.val_mat4x2_;
output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250));
metal::half4x3 _e257 = input_uniform.val_mat4x3_;
output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257));
metal::half4x4 _e264 = input_uniform.val_mat4x4_;
output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264));
half _e267 = val;
return _e267;
}
kernel void main_(
constant UniformCompatible& input_uniform [[user(fake0)]]
, device UniformCompatible const& input_storage [[user(fake0)]]
, device StorageCompatible const& input_arrays [[user(fake0)]]
, device UniformCompatible& output [[user(fake0)]]
, device StorageCompatible& output_arrays [[user(fake0)]]
) {
half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays);
output.final_value = _e3;
return;
}

View File

@@ -1,7 +1,7 @@
; SPIR-V
; Version: 1.0
; Generator: rspirv
; Bound: 175
; Bound: 173
OpCapability Shader
OpCapability Int64Atomics
OpCapability Int64
@@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64"
OpEntryPoint GLCompute %105 "test_atomic_compare_exchange_u64"
OpEntryPoint GLCompute %104 "test_atomic_compare_exchange_u64"
OpExecutionMode %19 LocalSize 1 1 1
OpExecutionMode %105 LocalSize 1 1 1
OpExecutionMode %104 LocalSize 1 1 1
OpDecorate %5 ArrayStride 8
OpDecorate %8 ArrayStride 8
OpMemberDecorate %10 0 Offset 0
@@ -63,12 +63,12 @@ OpMemberDecorate %16 0 Offset 0
%68 = OpTypeInt 32 1
%67 = OpConstant %68 1
%69 = OpConstant %3 64
%106 = OpTypePointer StorageBuffer %8
%108 = OpConstant %7 10
%111 = OpTypePointer Function %7
%112 = OpConstantNull %7
%114 = OpConstantNull %9
%138 = OpTypePointer StorageBuffer %7
%105 = OpTypePointer StorageBuffer %8
%107 = OpConstant %7 10
%110 = OpTypePointer Function %7
%111 = OpConstantNull %7
%113 = OpConstantNull %9
%137 = OpTypePointer StorageBuffer %7
%19 = OpFunction %2 None %20
%18 = OpLabel
%27 = OpVariable %28 Function %22
@@ -142,16 +142,16 @@ OpBranch %89
%89 = OpLabel
%91 = OpLoad %4 %29
%92 = OpIAdd %4 %91 %25
%94 = OpLoad %3 %27
%95 = OpLoad %4 %29
%97 = OpAccessChain %64 %23 %94
%98 = OpAtomicCompareExchange %4 %97 %67 %69 %69 %92 %95
%99 = OpIEqual %9 %98 %95
%96 = OpCompositeConstruct %10 %98 %99
%100 = OpCompositeExtract %4 %96 0
OpStore %29 %100
%101 = OpCompositeExtract %9 %96 1
OpStore %32 %101
%93 = OpLoad %3 %27
%94 = OpLoad %4 %29
%96 = OpAccessChain %64 %23 %93
%97 = OpAtomicCompareExchange %4 %96 %67 %69 %69 %92 %94
%98 = OpIEqual %9 %97 %94
%95 = OpCompositeConstruct %10 %97 %98
%99 = OpCompositeExtract %4 %95 0
OpStore %29 %99
%100 = OpCompositeExtract %9 %95 1
OpStore %32 %100
OpBranch %90
%90 = OpLabel
OpBranch %73
@@ -162,110 +162,110 @@ OpBranch %62
%62 = OpLabel
OpBranch %39
%39 = OpLabel
%102 = OpLoad %3 %27
%103 = OpIAdd %3 %102 %26
OpStore %27 %103
%101 = OpLoad %3 %27
%102 = OpIAdd %3 %101 %26
OpStore %27 %102
OpBranch %36
%37 = OpLabel
OpReturn
OpFunctionEnd
%105 = OpFunction %2 None %20
%104 = OpLabel
%109 = OpVariable %28 Function %22
%110 = OpVariable %111 Function %112
%113 = OpVariable %33 Function %114
%120 = OpVariable %41 Function %43
%145 = OpVariable %41 Function %43
%107 = OpAccessChain %106 %15 %22
%104 = OpFunction %2 None %20
%103 = OpLabel
%108 = OpVariable %28 Function %22
%109 = OpVariable %110 Function %111
%112 = OpVariable %33 Function %113
%119 = OpVariable %41 Function %43
%144 = OpVariable %41 Function %43
%106 = OpAccessChain %105 %15 %22
OpBranch %114
%114 = OpLabel
OpBranch %115
%115 = OpLabel
OpBranch %116
%116 = OpLabel
OpLoopMerge %117 %119 None
OpBranch %121
%121 = OpLabel
%122 = OpLoad %40 %120
%123 = OpIEqual %42 %45 %122
%124 = OpAll %9 %123
OpSelectionMerge %125 None
OpBranchConditional %124 %117 %125
%125 = OpLabel
%126 = OpCompositeExtract %3 %122 1
%127 = OpIEqual %9 %126 %44
%128 = OpSelect %3 %127 %26 %22
%129 = OpCompositeConstruct %40 %128 %26
%130 = OpIAdd %40 %122 %129
OpStore %120 %130
OpBranch %118
%118 = OpLabel
%131 = OpLoad %3 %109
%132 = OpULessThan %9 %131 %6
OpSelectionMerge %133 None
OpBranchConditional %132 %133 %134
%134 = OpLabel
OpLoopMerge %116 %118 None
OpBranch %120
%120 = OpLabel
%121 = OpLoad %40 %119
%122 = OpIEqual %42 %45 %121
%123 = OpAll %9 %122
OpSelectionMerge %124 None
OpBranchConditional %123 %116 %124
%124 = OpLabel
%125 = OpCompositeExtract %3 %121 1
%126 = OpIEqual %9 %125 %44
%127 = OpSelect %3 %126 %26 %22
%128 = OpCompositeConstruct %40 %127 %26
%129 = OpIAdd %40 %121 %128
OpStore %119 %129
OpBranch %117
%117 = OpLabel
%130 = OpLoad %3 %108
%131 = OpULessThan %9 %130 %6
OpSelectionMerge %132 None
OpBranchConditional %131 %132 %133
%133 = OpLabel
OpBranch %135
%135 = OpLabel
%137 = OpLoad %3 %109
%139 = OpAccessChain %138 %107 %137
%140 = OpAtomicLoad %7 %139 %67 %69
OpStore %110 %140
OpStore %113 %24
OpBranch %141
%141 = OpLabel
OpLoopMerge %142 %144 None
OpBranch %146
%146 = OpLabel
%147 = OpLoad %40 %145
%148 = OpIEqual %42 %45 %147
%149 = OpAll %9 %148
OpSelectionMerge %150 None
OpBranchConditional %149 %142 %150
%150 = OpLabel
%151 = OpCompositeExtract %3 %147 1
%152 = OpIEqual %9 %151 %44
%153 = OpSelect %3 %152 %26 %22
%154 = OpCompositeConstruct %40 %153 %26
%155 = OpIAdd %40 %147 %154
OpStore %145 %155
OpBranch %143
%143 = OpLabel
%156 = OpLoad %9 %113
%157 = OpLogicalNot %9 %156
OpSelectionMerge %158 None
OpBranchConditional %157 %158 %159
%159 = OpLabel
OpBranch %116
%132 = OpLabel
OpBranch %134
%134 = OpLabel
%136 = OpLoad %3 %108
%138 = OpAccessChain %137 %106 %136
%139 = OpAtomicLoad %7 %138 %67 %69
OpStore %109 %139
OpStore %112 %24
OpBranch %140
%140 = OpLabel
OpLoopMerge %141 %143 None
OpBranch %145
%145 = OpLabel
%146 = OpLoad %40 %144
%147 = OpIEqual %42 %45 %146
%148 = OpAll %9 %147
OpSelectionMerge %149 None
OpBranchConditional %148 %141 %149
%149 = OpLabel
%150 = OpCompositeExtract %3 %146 1
%151 = OpIEqual %9 %150 %44
%152 = OpSelect %3 %151 %26 %22
%153 = OpCompositeConstruct %40 %152 %26
%154 = OpIAdd %40 %146 %153
OpStore %144 %154
OpBranch %142
%142 = OpLabel
%155 = OpLoad %9 %112
%156 = OpLogicalNot %9 %155
OpSelectionMerge %157 None
OpBranchConditional %156 %157 %158
%158 = OpLabel
OpBranch %141
%157 = OpLabel
OpBranch %159
%159 = OpLabel
%161 = OpLoad %7 %109
%162 = OpIAdd %7 %161 %107
%163 = OpLoad %3 %108
%164 = OpLoad %7 %109
%166 = OpAccessChain %137 %106 %163
%167 = OpAtomicCompareExchange %7 %166 %67 %69 %69 %162 %164
%168 = OpIEqual %9 %167 %164
%165 = OpCompositeConstruct %11 %167 %168
%169 = OpCompositeExtract %7 %165 0
OpStore %109 %169
%170 = OpCompositeExtract %9 %165 1
OpStore %112 %170
OpBranch %160
%160 = OpLabel
%162 = OpLoad %7 %110
%163 = OpIAdd %7 %162 %108
%165 = OpLoad %3 %109
%166 = OpLoad %7 %110
%168 = OpAccessChain %138 %107 %165
%169 = OpAtomicCompareExchange %7 %168 %67 %69 %69 %163 %166
%170 = OpIEqual %9 %169 %166
%167 = OpCompositeConstruct %11 %169 %170
%171 = OpCompositeExtract %7 %167 0
OpStore %110 %171
%172 = OpCompositeExtract %9 %167 1
OpStore %113 %172
OpBranch %161
%161 = OpLabel
OpBranch %144
%144 = OpLabel
OpBranch %141
%142 = OpLabel
OpBranch %136
%136 = OpLabel
OpBranch %119
%119 = OpLabel
%173 = OpLoad %3 %109
%174 = OpIAdd %3 %173 %26
OpStore %109 %174
OpBranch %116
%117 = OpLabel
OpBranch %143
%143 = OpLabel
OpBranch %140
%141 = OpLabel
OpBranch %135
%135 = OpLabel
OpBranch %118
%118 = OpLabel
%171 = OpLoad %3 %108
%172 = OpIAdd %3 %171 %26
OpStore %108 %172
OpBranch %115
%116 = OpLabel
OpReturn
OpFunctionEnd

View File

@@ -1,76 +1,74 @@
; SPIR-V
; Version: 1.2
; Generator: rspirv
; Bound: 48
; Bound: 47
OpCapability Shader
OpCapability Float64
OpCapability Geometry
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %23 "main" %15 %18 %21
OpExecutionMode %23 OriginUpperLeft
OpEntryPoint Fragment %22 "main" %14 %17 %20
OpExecutionMode %22 OriginUpperLeft
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 16
OpMemberDecorate %9 0 Offset 0
OpMemberDecorate %9 1 Offset 16
OpDecorate %11 Block
OpMemberDecorate %11 0 Offset 0
OpDecorate %15 Location 0
OpDecorate %18 BuiltIn PrimitiveId
OpDecorate %18 Flat
OpDecorate %21 Location 0
OpMemberDecorate %6 1 Offset 8
OpMemberDecorate %8 0 Offset 0
OpMemberDecorate %8 1 Offset 16
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %14 Location 0
OpDecorate %17 BuiltIn PrimitiveId
OpDecorate %17 Flat
OpDecorate %20 Location 0
%2 = OpTypeVoid
%3 = OpTypeInt 32 0
%5 = OpTypeFloat 64
%5 = OpTypeFloat 32
%4 = OpTypeVector %5 2
%6 = OpTypeStruct %3 %4
%8 = OpTypeFloat 32
%7 = OpTypeVector %8 4
%9 = OpTypeStruct %7 %3
%11 = OpTypeStruct %6
%12 = OpTypePointer PushConstant %11
%10 = OpVariable %12 PushConstant
%16 = OpTypePointer Input %7
%15 = OpVariable %16 Input
%19 = OpTypePointer Input %3
%18 = OpVariable %19 Input
%22 = OpTypePointer Output %7
%21 = OpVariable %22 Output
%24 = OpTypeFunction %2
%25 = OpTypePointer PushConstant %6
%26 = OpConstant %3 0
%28 = OpConstant %8 1.0
%29 = OpTypeVector %8 3
%30 = OpConstantComposite %29 %28 %28 %28
%33 = OpTypePointer PushConstant %3
%36 = OpTypeBool
%23 = OpFunction %2 None %24
%13 = OpLabel
%17 = OpLoad %7 %15
%20 = OpLoad %3 %18
%14 = OpCompositeConstruct %9 %17 %20
%27 = OpAccessChain %25 %10 %26
OpBranch %31
%31 = OpLabel
%32 = OpCompositeExtract %3 %14 1
%34 = OpAccessChain %33 %27 %26
%35 = OpLoad %3 %34
%37 = OpIEqual %36 %32 %35
OpSelectionMerge %38 None
OpBranchConditional %37 %39 %40
%39 = OpLabel
%41 = OpCompositeExtract %7 %14 0
OpStore %21 %41
OpReturn
%40 = OpLabel
%42 = OpCompositeExtract %7 %14 0
%43 = OpVectorShuffle %29 %42 %42 0 1 2
%44 = OpFSub %29 %30 %43
%45 = OpCompositeExtract %7 %14 0
%46 = OpCompositeExtract %8 %45 3
%47 = OpCompositeConstruct %7 %44 %46
OpStore %21 %47
OpReturn
%7 = OpTypeVector %5 4
%8 = OpTypeStruct %7 %3
%10 = OpTypeStruct %6
%11 = OpTypePointer PushConstant %10
%9 = OpVariable %11 PushConstant
%15 = OpTypePointer Input %7
%14 = OpVariable %15 Input
%18 = OpTypePointer Input %3
%17 = OpVariable %18 Input
%21 = OpTypePointer Output %7
%20 = OpVariable %21 Output
%23 = OpTypeFunction %2
%24 = OpTypePointer PushConstant %6
%25 = OpConstant %3 0
%27 = OpConstant %5 1.0
%28 = OpTypeVector %5 3
%29 = OpConstantComposite %28 %27 %27 %27
%32 = OpTypePointer PushConstant %3
%35 = OpTypeBool
%22 = OpFunction %2 None %23
%12 = OpLabel
%16 = OpLoad %7 %14
%19 = OpLoad %3 %17
%13 = OpCompositeConstruct %8 %16 %19
%26 = OpAccessChain %24 %9 %25
OpBranch %30
%30 = OpLabel
%31 = OpCompositeExtract %3 %13 1
%33 = OpAccessChain %32 %26 %25
%34 = OpLoad %3 %33
%36 = OpIEqual %35 %31 %34
OpSelectionMerge %37 None
OpBranchConditional %36 %38 %39
%38 = OpLabel
%40 = OpCompositeExtract %7 %13 0
OpStore %20 %40
OpReturn
%39 = OpLabel
%41 = OpCompositeExtract %7 %13 0
%42 = OpVectorShuffle %28 %41 %41 0 1 2
%43 = OpFSub %28 %29 %42
%44 = OpCompositeExtract %7 %13 0
%45 = OpCompositeExtract %5 %44 3
%46 = OpCompositeConstruct %7 %43 %45
OpStore %20 %46
OpReturn
%37 = OpLabel
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,633 @@
; SPIR-V
; Version: 1.0
; Generator: rspirv
; Bound: 511
OpCapability Shader
OpCapability Float16
OpCapability StorageBuffer16BitAccess
OpCapability UniformAndStorageBuffer16BitAccess
OpCapability StorageInputOutput16
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpExtension "SPV_KHR_16bit_storage"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %499 "main"
OpExecutionMode %499 LocalSize 1 1 1
OpMemberDecorate %19 0 Offset 0
OpMemberDecorate %19 1 Offset 4
OpMemberDecorate %19 2 Offset 8
OpMemberDecorate %19 3 Offset 12
OpMemberDecorate %19 4 Offset 16
OpMemberDecorate %19 5 Offset 24
OpMemberDecorate %19 6 Offset 32
OpMemberDecorate %19 7 Offset 40
OpMemberDecorate %19 8 Offset 44
OpMemberDecorate %19 8 ColMajor
OpMemberDecorate %19 8 MatrixStride 4
OpMemberDecorate %19 9 Offset 56
OpMemberDecorate %19 9 ColMajor
OpMemberDecorate %19 9 MatrixStride 8
OpMemberDecorate %19 10 Offset 72
OpMemberDecorate %19 10 ColMajor
OpMemberDecorate %19 10 MatrixStride 8
OpMemberDecorate %19 11 Offset 88
OpMemberDecorate %19 11 ColMajor
OpMemberDecorate %19 11 MatrixStride 4
OpMemberDecorate %19 12 Offset 104
OpMemberDecorate %19 12 ColMajor
OpMemberDecorate %19 12 MatrixStride 8
OpMemberDecorate %19 13 Offset 128
OpMemberDecorate %19 13 ColMajor
OpMemberDecorate %19 13 MatrixStride 8
OpMemberDecorate %19 14 Offset 152
OpMemberDecorate %19 14 ColMajor
OpMemberDecorate %19 14 MatrixStride 4
OpMemberDecorate %19 15 Offset 168
OpMemberDecorate %19 15 ColMajor
OpMemberDecorate %19 15 MatrixStride 8
OpMemberDecorate %19 16 Offset 200
OpMemberDecorate %19 16 ColMajor
OpMemberDecorate %19 16 MatrixStride 8
OpDecorate %20 ArrayStride 2
OpMemberDecorate %22 0 Offset 0
OpMemberDecorate %23 0 Offset 0
OpMemberDecorate %23 1 Offset 2
OpMemberDecorate %23 2 Offset 8
OpMemberDecorate %23 3 Offset 14
OpMemberDecorate %23 4 Offset 16
OpMemberDecorate %23 5 Offset 20
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %29 Block
OpMemberDecorate %29 0 Offset 0
OpDecorate %31 NonWritable
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %32 Block
OpMemberDecorate %32 0 Offset 0
OpDecorate %34 NonWritable
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 2
OpDecorate %35 Block
OpMemberDecorate %35 0 Offset 0
OpDecorate %37 DescriptorSet 0
OpDecorate %37 Binding 3
OpDecorate %38 Block
OpMemberDecorate %38 0 Offset 0
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 4
OpDecorate %41 Block
OpMemberDecorate %41 0 Offset 0
%2 = OpTypeVoid
%3 = OpTypeFloat 16
%4 = OpTypeInt 32 0
%5 = OpTypeInt 32 1
%6 = OpTypeFloat 32
%7 = OpTypeVector %3 2
%8 = OpTypeVector %3 3
%9 = OpTypeVector %3 4
%10 = OpTypeMatrix %7 2
%11 = OpTypeMatrix %8 2
%12 = OpTypeMatrix %9 2
%13 = OpTypeMatrix %7 3
%14 = OpTypeMatrix %8 3
%15 = OpTypeMatrix %9 3
%16 = OpTypeMatrix %7 4
%17 = OpTypeMatrix %8 4
%18 = OpTypeMatrix %9 4
%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18
%21 = OpConstant %4 2
%20 = OpTypeArray %3 %21
%22 = OpTypeStruct %20
%23 = OpTypeStruct %3 %3 %8 %3 %3 %4
%24 = OpConstant %3 2.1524e-41
%25 = OpConstant %3 2.7121e-41
%27 = OpTypePointer Private %3
%26 = OpVariable %27 Private %24
%29 = OpTypeStruct %19
%30 = OpTypePointer Uniform %29
%28 = OpVariable %30 Uniform
%32 = OpTypeStruct %19
%33 = OpTypePointer StorageBuffer %32
%31 = OpVariable %33 StorageBuffer
%35 = OpTypeStruct %22
%36 = OpTypePointer StorageBuffer %35
%34 = OpVariable %36 StorageBuffer
%38 = OpTypeStruct %19
%39 = OpTypePointer StorageBuffer %38
%37 = OpVariable %39 StorageBuffer
%41 = OpTypeStruct %22
%42 = OpTypePointer StorageBuffer %41
%40 = OpVariable %42 StorageBuffer
%46 = OpTypeFunction %3 %3
%47 = OpTypePointer Uniform %19
%48 = OpConstant %4 0
%50 = OpTypePointer StorageBuffer %19
%52 = OpTypePointer StorageBuffer %22
%56 = OpConstant %3 8.8991e-41
%57 = OpConstant %3 2.4753e-41
%59 = OpTypePointer Function %3
%67 = OpTypePointer Uniform %6
%76 = OpTypePointer Uniform %3
%77 = OpConstant %4 3
%84 = OpTypePointer StorageBuffer %3
%91 = OpTypePointer StorageBuffer %7
%92 = OpTypePointer Uniform %7
%93 = OpConstant %4 4
%100 = OpTypePointer StorageBuffer %8
%101 = OpTypePointer Uniform %8
%102 = OpConstant %4 5
%109 = OpTypePointer StorageBuffer %9
%110 = OpTypePointer Uniform %9
%111 = OpConstant %4 6
%118 = OpTypePointer StorageBuffer %10
%119 = OpTypePointer Uniform %10
%120 = OpConstant %4 8
%133 = OpTypePointer StorageBuffer %11
%134 = OpTypePointer Uniform %11
%135 = OpConstant %4 9
%148 = OpTypePointer StorageBuffer %12
%149 = OpTypePointer Uniform %12
%150 = OpConstant %4 10
%163 = OpTypePointer StorageBuffer %13
%164 = OpTypePointer Uniform %13
%165 = OpConstant %4 11
%181 = OpTypePointer StorageBuffer %14
%182 = OpTypePointer Uniform %14
%183 = OpConstant %4 12
%199 = OpTypePointer StorageBuffer %15
%200 = OpTypePointer Uniform %15
%201 = OpConstant %4 13
%217 = OpTypePointer StorageBuffer %16
%218 = OpTypePointer Uniform %16
%219 = OpConstant %4 14
%238 = OpTypePointer StorageBuffer %17
%239 = OpTypePointer Uniform %17
%240 = OpConstant %4 15
%259 = OpTypePointer StorageBuffer %18
%260 = OpTypePointer Uniform %18
%261 = OpConstant %4 16
%280 = OpTypePointer StorageBuffer %20
%319 = OpTypeVector %6 2
%325 = OpTypeVector %6 3
%331 = OpTypeVector %6 4
%337 = OpTypeMatrix %319 2
%351 = OpTypeMatrix %325 2
%365 = OpTypeMatrix %331 2
%379 = OpTypeMatrix %319 3
%397 = OpTypeMatrix %325 3
%415 = OpTypeMatrix %331 3
%433 = OpTypeMatrix %319 4
%455 = OpTypeMatrix %325 4
%477 = OpTypeMatrix %331 4
%500 = OpTypeFunction %2
%506 = OpConstant %3 2.2959e-41
%509 = OpConstant %4 7
%45 = OpFunction %3 None %46
%44 = OpFunctionParameter %3
%43 = OpLabel
%58 = OpVariable %59 Function %25
%49 = OpAccessChain %47 %28 %48
%51 = OpAccessChain %50 %31 %48
%53 = OpAccessChain %52 %34 %48
%54 = OpAccessChain %50 %37 %48
%55 = OpAccessChain %52 %40 %48
OpBranch %60
%60 = OpLabel
%61 = OpLoad %3 %58
%62 = OpFAdd %3 %61 %56
OpStore %58 %62
%63 = OpLoad %3 %58
%64 = OpFAdd %3 %63 %57
%65 = OpLoad %3 %58
%66 = OpFAdd %3 %65 %64
OpStore %58 %66
%68 = OpAccessChain %67 %49 %21
%69 = OpLoad %6 %68
%70 = OpLoad %3 %58
%71 = OpFConvert %6 %70
%72 = OpFAdd %6 %69 %71
%73 = OpFConvert %3 %72
%74 = OpLoad %3 %58
%75 = OpFAdd %3 %74 %73
OpStore %58 %75
%78 = OpAccessChain %76 %49 %77
%79 = OpLoad %3 %78
%80 = OpCompositeConstruct %8 %79 %79 %79
%81 = OpCompositeExtract %3 %80 2
%82 = OpLoad %3 %58
%83 = OpFAdd %3 %82 %81
OpStore %58 %83
%85 = OpAccessChain %76 %49 %77
%86 = OpLoad %3 %85
%87 = OpAccessChain %84 %51 %77
%88 = OpLoad %3 %87
%89 = OpFAdd %3 %86 %88
%90 = OpAccessChain %84 %54 %77
OpStore %90 %89
%94 = OpAccessChain %92 %49 %93
%95 = OpLoad %7 %94
%96 = OpAccessChain %91 %51 %93
%97 = OpLoad %7 %96
%98 = OpFAdd %7 %95 %97
%99 = OpAccessChain %91 %54 %93
OpStore %99 %98
%103 = OpAccessChain %101 %49 %102
%104 = OpLoad %8 %103
%105 = OpAccessChain %100 %51 %102
%106 = OpLoad %8 %105
%107 = OpFAdd %8 %104 %106
%108 = OpAccessChain %100 %54 %102
OpStore %108 %107
%112 = OpAccessChain %110 %49 %111
%113 = OpLoad %9 %112
%114 = OpAccessChain %109 %51 %111
%115 = OpLoad %9 %114
%116 = OpFAdd %9 %113 %115
%117 = OpAccessChain %109 %54 %111
OpStore %117 %116
%121 = OpAccessChain %119 %49 %120
%122 = OpLoad %10 %121
%123 = OpAccessChain %118 %51 %120
%124 = OpLoad %10 %123
%126 = OpCompositeExtract %7 %122 0
%127 = OpCompositeExtract %7 %124 0
%128 = OpFAdd %7 %126 %127
%129 = OpCompositeExtract %7 %122 1
%130 = OpCompositeExtract %7 %124 1
%131 = OpFAdd %7 %129 %130
%125 = OpCompositeConstruct %10 %128 %131
%132 = OpAccessChain %118 %54 %120
OpStore %132 %125
%136 = OpAccessChain %134 %49 %135
%137 = OpLoad %11 %136
%138 = OpAccessChain %133 %51 %135
%139 = OpLoad %11 %138
%141 = OpCompositeExtract %8 %137 0
%142 = OpCompositeExtract %8 %139 0
%143 = OpFAdd %8 %141 %142
%144 = OpCompositeExtract %8 %137 1
%145 = OpCompositeExtract %8 %139 1
%146 = OpFAdd %8 %144 %145
%140 = OpCompositeConstruct %11 %143 %146
%147 = OpAccessChain %133 %54 %135
OpStore %147 %140
%151 = OpAccessChain %149 %49 %150
%152 = OpLoad %12 %151
%153 = OpAccessChain %148 %51 %150
%154 = OpLoad %12 %153
%156 = OpCompositeExtract %9 %152 0
%157 = OpCompositeExtract %9 %154 0
%158 = OpFAdd %9 %156 %157
%159 = OpCompositeExtract %9 %152 1
%160 = OpCompositeExtract %9 %154 1
%161 = OpFAdd %9 %159 %160
%155 = OpCompositeConstruct %12 %158 %161
%162 = OpAccessChain %148 %54 %150
OpStore %162 %155
%166 = OpAccessChain %164 %49 %165
%167 = OpLoad %13 %166
%168 = OpAccessChain %163 %51 %165
%169 = OpLoad %13 %168
%171 = OpCompositeExtract %7 %167 0
%172 = OpCompositeExtract %7 %169 0
%173 = OpFAdd %7 %171 %172
%174 = OpCompositeExtract %7 %167 1
%175 = OpCompositeExtract %7 %169 1
%176 = OpFAdd %7 %174 %175
%177 = OpCompositeExtract %7 %167 2
%178 = OpCompositeExtract %7 %169 2
%179 = OpFAdd %7 %177 %178
%170 = OpCompositeConstruct %13 %173 %176 %179
%180 = OpAccessChain %163 %54 %165
OpStore %180 %170
%184 = OpAccessChain %182 %49 %183
%185 = OpLoad %14 %184
%186 = OpAccessChain %181 %51 %183
%187 = OpLoad %14 %186
%189 = OpCompositeExtract %8 %185 0
%190 = OpCompositeExtract %8 %187 0
%191 = OpFAdd %8 %189 %190
%192 = OpCompositeExtract %8 %185 1
%193 = OpCompositeExtract %8 %187 1
%194 = OpFAdd %8 %192 %193
%195 = OpCompositeExtract %8 %185 2
%196 = OpCompositeExtract %8 %187 2
%197 = OpFAdd %8 %195 %196
%188 = OpCompositeConstruct %14 %191 %194 %197
%198 = OpAccessChain %181 %54 %183
OpStore %198 %188
%202 = OpAccessChain %200 %49 %201
%203 = OpLoad %15 %202
%204 = OpAccessChain %199 %51 %201
%205 = OpLoad %15 %204
%207 = OpCompositeExtract %9 %203 0
%208 = OpCompositeExtract %9 %205 0
%209 = OpFAdd %9 %207 %208
%210 = OpCompositeExtract %9 %203 1
%211 = OpCompositeExtract %9 %205 1
%212 = OpFAdd %9 %210 %211
%213 = OpCompositeExtract %9 %203 2
%214 = OpCompositeExtract %9 %205 2
%215 = OpFAdd %9 %213 %214
%206 = OpCompositeConstruct %15 %209 %212 %215
%216 = OpAccessChain %199 %54 %201
OpStore %216 %206
%220 = OpAccessChain %218 %49 %219
%221 = OpLoad %16 %220
%222 = OpAccessChain %217 %51 %219
%223 = OpLoad %16 %222
%225 = OpCompositeExtract %7 %221 0
%226 = OpCompositeExtract %7 %223 0
%227 = OpFAdd %7 %225 %226
%228 = OpCompositeExtract %7 %221 1
%229 = OpCompositeExtract %7 %223 1
%230 = OpFAdd %7 %228 %229
%231 = OpCompositeExtract %7 %221 2
%232 = OpCompositeExtract %7 %223 2
%233 = OpFAdd %7 %231 %232
%234 = OpCompositeExtract %7 %221 3
%235 = OpCompositeExtract %7 %223 3
%236 = OpFAdd %7 %234 %235
%224 = OpCompositeConstruct %16 %227 %230 %233 %236
%237 = OpAccessChain %217 %54 %219
OpStore %237 %224
%241 = OpAccessChain %239 %49 %240
%242 = OpLoad %17 %241
%243 = OpAccessChain %238 %51 %240
%244 = OpLoad %17 %243
%246 = OpCompositeExtract %8 %242 0
%247 = OpCompositeExtract %8 %244 0
%248 = OpFAdd %8 %246 %247
%249 = OpCompositeExtract %8 %242 1
%250 = OpCompositeExtract %8 %244 1
%251 = OpFAdd %8 %249 %250
%252 = OpCompositeExtract %8 %242 2
%253 = OpCompositeExtract %8 %244 2
%254 = OpFAdd %8 %252 %253
%255 = OpCompositeExtract %8 %242 3
%256 = OpCompositeExtract %8 %244 3
%257 = OpFAdd %8 %255 %256
%245 = OpCompositeConstruct %17 %248 %251 %254 %257
%258 = OpAccessChain %238 %54 %240
OpStore %258 %245
%262 = OpAccessChain %260 %49 %261
%263 = OpLoad %18 %262
%264 = OpAccessChain %259 %51 %261
%265 = OpLoad %18 %264
%267 = OpCompositeExtract %9 %263 0
%268 = OpCompositeExtract %9 %265 0
%269 = OpFAdd %9 %267 %268
%270 = OpCompositeExtract %9 %263 1
%271 = OpCompositeExtract %9 %265 1
%272 = OpFAdd %9 %270 %271
%273 = OpCompositeExtract %9 %263 2
%274 = OpCompositeExtract %9 %265 2
%275 = OpFAdd %9 %273 %274
%276 = OpCompositeExtract %9 %263 3
%277 = OpCompositeExtract %9 %265 3
%278 = OpFAdd %9 %276 %277
%266 = OpCompositeConstruct %18 %269 %272 %275 %278
%279 = OpAccessChain %259 %54 %261
OpStore %279 %266
%281 = OpAccessChain %280 %53 %48
%282 = OpLoad %20 %281
%283 = OpAccessChain %280 %55 %48
OpStore %283 %282
%284 = OpLoad %3 %58
%285 = OpExtInst %3 %1 FAbs %284
%286 = OpLoad %3 %58
%287 = OpFAdd %3 %286 %285
OpStore %58 %287
%288 = OpLoad %3 %58
%289 = OpLoad %3 %58
%290 = OpLoad %3 %58
%291 = OpExtInst %3 %1 FClamp %288 %289 %290
%292 = OpLoad %3 %58
%293 = OpFAdd %3 %292 %291
OpStore %58 %293
%294 = OpLoad %3 %58
%295 = OpCompositeConstruct %7 %294 %294
%296 = OpLoad %3 %58
%297 = OpCompositeConstruct %7 %296 %296
%298 = OpDot %3 %295 %297
%299 = OpLoad %3 %58
%300 = OpFAdd %3 %299 %298
OpStore %58 %300
%301 = OpLoad %3 %58
%302 = OpLoad %3 %58
%303 = OpExtInst %3 %1 FMax %301 %302
%304 = OpLoad %3 %58
%305 = OpFAdd %3 %304 %303
OpStore %58 %305
%306 = OpLoad %3 %58
%307 = OpLoad %3 %58
%308 = OpExtInst %3 %1 FMin %306 %307
%309 = OpLoad %3 %58
%310 = OpFAdd %3 %309 %308
OpStore %58 %310
%311 = OpLoad %3 %58
%312 = OpExtInst %3 %1 FSign %311
%313 = OpLoad %3 %58
%314 = OpFAdd %3 %313 %312
OpStore %58 %314
%315 = OpLoad %3 %58
%316 = OpFAdd %3 %315 %24
OpStore %58 %316
%317 = OpAccessChain %92 %49 %93
%318 = OpLoad %7 %317
%320 = OpFConvert %319 %318
%321 = OpFConvert %7 %320
%322 = OpAccessChain %91 %54 %93
OpStore %322 %321
%323 = OpAccessChain %101 %49 %102
%324 = OpLoad %8 %323
%326 = OpFConvert %325 %324
%327 = OpFConvert %8 %326
%328 = OpAccessChain %100 %54 %102
OpStore %328 %327
%329 = OpAccessChain %110 %49 %111
%330 = OpLoad %9 %329
%332 = OpFConvert %331 %330
%333 = OpFConvert %9 %332
%334 = OpAccessChain %109 %54 %111
OpStore %334 %333
%335 = OpAccessChain %119 %49 %120
%336 = OpLoad %10 %335
%338 = OpCompositeExtract %7 %336 0
%339 = OpFConvert %319 %338
%340 = OpCompositeExtract %7 %336 1
%341 = OpFConvert %319 %340
%342 = OpCompositeConstruct %337 %339 %341
%343 = OpCompositeExtract %319 %342 0
%344 = OpFConvert %7 %343
%345 = OpCompositeExtract %319 %342 1
%346 = OpFConvert %7 %345
%347 = OpCompositeConstruct %10 %344 %346
%348 = OpAccessChain %118 %54 %120
OpStore %348 %347
%349 = OpAccessChain %134 %49 %135
%350 = OpLoad %11 %349
%352 = OpCompositeExtract %8 %350 0
%353 = OpFConvert %325 %352
%354 = OpCompositeExtract %8 %350 1
%355 = OpFConvert %325 %354
%356 = OpCompositeConstruct %351 %353 %355
%357 = OpCompositeExtract %325 %356 0
%358 = OpFConvert %8 %357
%359 = OpCompositeExtract %325 %356 1
%360 = OpFConvert %8 %359
%361 = OpCompositeConstruct %11 %358 %360
%362 = OpAccessChain %133 %54 %135
OpStore %362 %361
%363 = OpAccessChain %149 %49 %150
%364 = OpLoad %12 %363
%366 = OpCompositeExtract %9 %364 0
%367 = OpFConvert %331 %366
%368 = OpCompositeExtract %9 %364 1
%369 = OpFConvert %331 %368
%370 = OpCompositeConstruct %365 %367 %369
%371 = OpCompositeExtract %331 %370 0
%372 = OpFConvert %9 %371
%373 = OpCompositeExtract %331 %370 1
%374 = OpFConvert %9 %373
%375 = OpCompositeConstruct %12 %372 %374
%376 = OpAccessChain %148 %54 %150
OpStore %376 %375
%377 = OpAccessChain %164 %49 %165
%378 = OpLoad %13 %377
%380 = OpCompositeExtract %7 %378 0
%381 = OpFConvert %319 %380
%382 = OpCompositeExtract %7 %378 1
%383 = OpFConvert %319 %382
%384 = OpCompositeExtract %7 %378 2
%385 = OpFConvert %319 %384
%386 = OpCompositeConstruct %379 %381 %383 %385
%387 = OpCompositeExtract %319 %386 0
%388 = OpFConvert %7 %387
%389 = OpCompositeExtract %319 %386 1
%390 = OpFConvert %7 %389
%391 = OpCompositeExtract %319 %386 2
%392 = OpFConvert %7 %391
%393 = OpCompositeConstruct %13 %388 %390 %392
%394 = OpAccessChain %163 %54 %165
OpStore %394 %393
%395 = OpAccessChain %182 %49 %183
%396 = OpLoad %14 %395
%398 = OpCompositeExtract %8 %396 0
%399 = OpFConvert %325 %398
%400 = OpCompositeExtract %8 %396 1
%401 = OpFConvert %325 %400
%402 = OpCompositeExtract %8 %396 2
%403 = OpFConvert %325 %402
%404 = OpCompositeConstruct %397 %399 %401 %403
%405 = OpCompositeExtract %325 %404 0
%406 = OpFConvert %8 %405
%407 = OpCompositeExtract %325 %404 1
%408 = OpFConvert %8 %407
%409 = OpCompositeExtract %325 %404 2
%410 = OpFConvert %8 %409
%411 = OpCompositeConstruct %14 %406 %408 %410
%412 = OpAccessChain %181 %54 %183
OpStore %412 %411
%413 = OpAccessChain %200 %49 %201
%414 = OpLoad %15 %413
%416 = OpCompositeExtract %9 %414 0
%417 = OpFConvert %331 %416
%418 = OpCompositeExtract %9 %414 1
%419 = OpFConvert %331 %418
%420 = OpCompositeExtract %9 %414 2
%421 = OpFConvert %331 %420
%422 = OpCompositeConstruct %415 %417 %419 %421
%423 = OpCompositeExtract %331 %422 0
%424 = OpFConvert %9 %423
%425 = OpCompositeExtract %331 %422 1
%426 = OpFConvert %9 %425
%427 = OpCompositeExtract %331 %422 2
%428 = OpFConvert %9 %427
%429 = OpCompositeConstruct %15 %424 %426 %428
%430 = OpAccessChain %199 %54 %201
OpStore %430 %429
%431 = OpAccessChain %218 %49 %219
%432 = OpLoad %16 %431
%434 = OpCompositeExtract %7 %432 0
%435 = OpFConvert %319 %434
%436 = OpCompositeExtract %7 %432 1
%437 = OpFConvert %319 %436
%438 = OpCompositeExtract %7 %432 2
%439 = OpFConvert %319 %438
%440 = OpCompositeExtract %7 %432 3
%441 = OpFConvert %319 %440
%442 = OpCompositeConstruct %433 %435 %437 %439 %441
%443 = OpCompositeExtract %319 %442 0
%444 = OpFConvert %7 %443
%445 = OpCompositeExtract %319 %442 1
%446 = OpFConvert %7 %445
%447 = OpCompositeExtract %319 %442 2
%448 = OpFConvert %7 %447
%449 = OpCompositeExtract %319 %442 3
%450 = OpFConvert %7 %449
%451 = OpCompositeConstruct %16 %444 %446 %448 %450
%452 = OpAccessChain %217 %54 %219
OpStore %452 %451
%453 = OpAccessChain %239 %49 %240
%454 = OpLoad %17 %453
%456 = OpCompositeExtract %8 %454 0
%457 = OpFConvert %325 %456
%458 = OpCompositeExtract %8 %454 1
%459 = OpFConvert %325 %458
%460 = OpCompositeExtract %8 %454 2
%461 = OpFConvert %325 %460
%462 = OpCompositeExtract %8 %454 3
%463 = OpFConvert %325 %462
%464 = OpCompositeConstruct %455 %457 %459 %461 %463
%465 = OpCompositeExtract %325 %464 0
%466 = OpFConvert %8 %465
%467 = OpCompositeExtract %325 %464 1
%468 = OpFConvert %8 %467
%469 = OpCompositeExtract %325 %464 2
%470 = OpFConvert %8 %469
%471 = OpCompositeExtract %325 %464 3
%472 = OpFConvert %8 %471
%473 = OpCompositeConstruct %17 %466 %468 %470 %472
%474 = OpAccessChain %238 %54 %240
OpStore %474 %473
%475 = OpAccessChain %260 %49 %261
%476 = OpLoad %18 %475
%478 = OpCompositeExtract %9 %476 0
%479 = OpFConvert %331 %478
%480 = OpCompositeExtract %9 %476 1
%481 = OpFConvert %331 %480
%482 = OpCompositeExtract %9 %476 2
%483 = OpFConvert %331 %482
%484 = OpCompositeExtract %9 %476 3
%485 = OpFConvert %331 %484
%486 = OpCompositeConstruct %477 %479 %481 %483 %485
%487 = OpCompositeExtract %331 %486 0
%488 = OpFConvert %9 %487
%489 = OpCompositeExtract %331 %486 1
%490 = OpFConvert %9 %489
%491 = OpCompositeExtract %331 %486 2
%492 = OpFConvert %9 %491
%493 = OpCompositeExtract %331 %486 3
%494 = OpFConvert %9 %493
%495 = OpCompositeConstruct %18 %488 %490 %492 %494
%496 = OpAccessChain %259 %54 %261
OpStore %496 %495
%497 = OpLoad %3 %58
OpReturnValue %497
OpFunctionEnd
%499 = OpFunction %2 None %500
%498 = OpLabel
%501 = OpAccessChain %47 %28 %48
%502 = OpAccessChain %50 %31 %48
%503 = OpAccessChain %52 %34 %48
%504 = OpAccessChain %50 %37 %48
%505 = OpAccessChain %52 %40 %48
OpBranch %507
%507 = OpLabel
%508 = OpFunctionCall %3 %45 %506
%510 = OpAccessChain %84 %504 %509
OpStore %510 %508
OpReturn
OpFunctionEnd

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
struct PushConstants {
index: u32,
double: vec2<f64>,
double: vec2<f32>,
}
struct FragmentIn {

View File

@@ -0,0 +1,47 @@
enable f16;
struct A {
a_1_: f16,
a_vec2_: vec2<f16>,
a_vec3_: vec3<f16>,
a_vec4_: vec4<f16>,
}
struct B {
b_1_: f16,
b_vec2_: vec2<f16>,
b_vec3_: vec3<f16>,
b_vec4_: vec4<f16>,
b_mat2_: mat2x2<f16>,
b_mat2x3_: mat2x3<f16>,
b_mat2x4_: mat2x4<f16>,
b_mat3x2_: mat3x2<f16>,
b_mat3_: mat3x3<f16>,
b_mat3x4_: mat3x4<f16>,
b_mat4x2_: mat4x2<f16>,
b_mat4x3_: mat4x3<f16>,
b_mat4_: mat4x4<f16>,
}
@group(0) @binding(0)
var<uniform> global: A;
@group(0) @binding(1)
var<storage, read_write> global_1: B;
fn main_1() {
let _e16 = global.a_1_;
global_1.b_1_ = _e16;
let _e17 = global.a_vec2_;
global_1.b_vec2_ = _e17;
let _e18 = global.a_vec3_;
global_1.b_vec3_ = _e18;
let _e19 = global.a_vec4_;
global_1.b_vec4_ = _e19;
return;
}
@compute @workgroup_size(1, 1, 1)
fn main() {
main_1();
return;
}

View File

@@ -0,0 +1,46 @@
enable f16;
struct B {
b_1_: f16,
b_vec2_: vec2<f16>,
b_vec3_: vec3<f16>,
b_vec4_: vec4<f16>,
b_mat2_: mat2x2<f16>,
b_mat2x3_: mat2x3<f16>,
b_mat2x4_: mat2x4<f16>,
b_mat3x2_: mat3x2<f16>,
b_mat3_: mat3x3<f16>,
b_mat3x4_: mat3x4<f16>,
b_mat4x2_: mat4x2<f16>,
b_mat4x3_: mat4x3<f16>,
b_mat4_: mat4x4<f16>,
}
struct A {
a_1_: f16,
a_vec2_: vec2<f16>,
a_vec3_: vec3<f16>,
a_vec4_: vec4<f16>,
}
@group(0) @binding(1)
var<storage, read_write> unnamed: B;
@group(0) @binding(0)
var<uniform> unnamed_1: A;
fn main_1() {
let _e3 = unnamed_1.a_1_;
unnamed.b_1_ = _e3;
let _e6 = unnamed_1.a_vec2_;
unnamed.b_vec2_ = _e6;
let _e9 = unnamed_1.a_vec3_;
unnamed.b_vec3_ = _e9;
let _e12 = unnamed_1.a_vec4_;
unnamed.b_vec4_ = _e12;
return;
}
@compute @workgroup_size(1, 1, 1)
fn main() {
main_1();
}

View File

@@ -0,0 +1,167 @@
enable f16;
struct UniformCompatible {
val_u32_: u32,
val_i32_: i32,
val_f32_: f32,
val_f16_: f16,
val_f16_2_: vec2<f16>,
val_f16_3_: vec3<f16>,
val_f16_4_: vec4<f16>,
final_value: f16,
val_mat2x2_: mat2x2<f16>,
val_mat2x3_: mat2x3<f16>,
val_mat2x4_: mat2x4<f16>,
val_mat3x2_: mat3x2<f16>,
val_mat3x3_: mat3x3<f16>,
val_mat3x4_: mat3x4<f16>,
val_mat4x2_: mat4x2<f16>,
val_mat4x3_: mat4x3<f16>,
val_mat4x4_: mat4x4<f16>,
}
struct StorageCompatible {
val_f16_array_2_: array<f16, 2>,
}
struct LayoutTest {
scalar1_: f16,
scalar2_: f16,
v3_: vec3<f16>,
tuck_in: f16,
scalar4_: f16,
larger: u32,
}
const constant_variable: f16 = 15.203125h;
var<private> private_variable: f16 = 1h;
@group(0) @binding(0)
var<uniform> input_uniform: UniformCompatible;
@group(0) @binding(1)
var<storage> input_storage: UniformCompatible;
@group(0) @binding(2)
var<storage> input_arrays: StorageCompatible;
@group(0) @binding(3)
var<storage, read_write> output: UniformCompatible;
@group(0) @binding(4)
var<storage, read_write> output_arrays: StorageCompatible;
fn f16_function(x: f16) -> f16 {
var val: f16 = 15.203125h;
let _e4 = val;
val = (_e4 + -33344h);
let _e6 = val;
let _e9 = val;
val = (_e9 + (_e6 + 5h));
let _e13 = input_uniform.val_f32_;
let _e14 = val;
let _e18 = val;
val = (_e18 + f16((_e13 + f32(_e14))));
let _e22 = input_uniform.val_f16_;
let _e25 = val;
val = (_e25 + vec3(_e22).z);
let _e31 = input_uniform.val_f16_;
let _e34 = input_storage.val_f16_;
output.val_f16_ = (_e31 + _e34);
let _e40 = input_uniform.val_f16_2_;
let _e43 = input_storage.val_f16_2_;
output.val_f16_2_ = (_e40 + _e43);
let _e49 = input_uniform.val_f16_3_;
let _e52 = input_storage.val_f16_3_;
output.val_f16_3_ = (_e49 + _e52);
let _e58 = input_uniform.val_f16_4_;
let _e61 = input_storage.val_f16_4_;
output.val_f16_4_ = (_e58 + _e61);
let _e67 = input_uniform.val_mat2x2_;
let _e70 = input_storage.val_mat2x2_;
output.val_mat2x2_ = (_e67 + _e70);
let _e76 = input_uniform.val_mat2x3_;
let _e79 = input_storage.val_mat2x3_;
output.val_mat2x3_ = (_e76 + _e79);
let _e85 = input_uniform.val_mat2x4_;
let _e88 = input_storage.val_mat2x4_;
output.val_mat2x4_ = (_e85 + _e88);
let _e94 = input_uniform.val_mat3x2_;
let _e97 = input_storage.val_mat3x2_;
output.val_mat3x2_ = (_e94 + _e97);
let _e103 = input_uniform.val_mat3x3_;
let _e106 = input_storage.val_mat3x3_;
output.val_mat3x3_ = (_e103 + _e106);
let _e112 = input_uniform.val_mat3x4_;
let _e115 = input_storage.val_mat3x4_;
output.val_mat3x4_ = (_e112 + _e115);
let _e121 = input_uniform.val_mat4x2_;
let _e124 = input_storage.val_mat4x2_;
output.val_mat4x2_ = (_e121 + _e124);
let _e130 = input_uniform.val_mat4x3_;
let _e133 = input_storage.val_mat4x3_;
output.val_mat4x3_ = (_e130 + _e133);
let _e139 = input_uniform.val_mat4x4_;
let _e142 = input_storage.val_mat4x4_;
output.val_mat4x4_ = (_e139 + _e142);
let _e148 = input_arrays.val_f16_array_2_;
output_arrays.val_f16_array_2_ = _e148;
let _e149 = val;
let _e151 = val;
val = (_e151 + abs(_e149));
let _e153 = val;
let _e154 = val;
let _e155 = val;
let _e157 = val;
val = (_e157 + clamp(_e153, _e154, _e155));
let _e159 = val;
let _e161 = val;
let _e164 = val;
val = (_e164 + dot(vec2(_e159), vec2(_e161)));
let _e166 = val;
let _e167 = val;
let _e169 = val;
val = (_e169 + max(_e166, _e167));
let _e171 = val;
let _e172 = val;
let _e174 = val;
val = (_e174 + min(_e171, _e172));
let _e176 = val;
let _e178 = val;
val = (_e178 + sign(_e176));
let _e181 = val;
val = (_e181 + 1h);
let _e185 = input_uniform.val_f16_2_;
let float_vec2_ = vec2<f32>(_e185);
output.val_f16_2_ = vec2<f16>(float_vec2_);
let _e192 = input_uniform.val_f16_3_;
let float_vec3_ = vec3<f32>(_e192);
output.val_f16_3_ = vec3<f16>(float_vec3_);
let _e199 = input_uniform.val_f16_4_;
let float_vec4_ = vec4<f32>(_e199);
output.val_f16_4_ = vec4<f16>(float_vec4_);
let _e208 = input_uniform.val_mat2x2_;
output.val_mat2x2_ = mat2x2<f16>(mat2x2<f32>(_e208));
let _e215 = input_uniform.val_mat2x3_;
output.val_mat2x3_ = mat2x3<f16>(mat2x3<f32>(_e215));
let _e222 = input_uniform.val_mat2x4_;
output.val_mat2x4_ = mat2x4<f16>(mat2x4<f32>(_e222));
let _e229 = input_uniform.val_mat3x2_;
output.val_mat3x2_ = mat3x2<f16>(mat3x2<f32>(_e229));
let _e236 = input_uniform.val_mat3x3_;
output.val_mat3x3_ = mat3x3<f16>(mat3x3<f32>(_e236));
let _e243 = input_uniform.val_mat3x4_;
output.val_mat3x4_ = mat3x4<f16>(mat3x4<f32>(_e243));
let _e250 = input_uniform.val_mat4x2_;
output.val_mat4x2_ = mat4x2<f16>(mat4x2<f32>(_e250));
let _e257 = input_uniform.val_mat4x3_;
output.val_mat4x3_ = mat4x3<f16>(mat4x3<f32>(_e257));
let _e264 = input_uniform.val_mat4x4_;
output.val_mat4x4_ = mat4x4<f16>(mat4x4<f32>(_e264));
let _e267 = val;
return _e267;
}
@compute @workgroup_size(1, 1, 1)
fn main() {
let _e3 = f16_function(2h);
output.final_value = _e3;
return;
}

View File

@@ -208,3 +208,8 @@ fn int64() {
"#,
);
}
#[test]
fn float16() {
require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
}

View File

@@ -8,7 +8,8 @@ use naga::valid::Capabilities;
#[track_caller]
fn check(input: &str, snapshot: &str) {
let output = naga::front::wgsl::parse_str(input)
.expect_err("expected parser error")
.map(|_| panic!("expected parser error, but parsing succeeded!"))
.unwrap_err()
.emit_to_string(input);
if output != snapshot {
for diff in diff::lines(snapshot, &output) {
@@ -22,6 +23,19 @@ fn check(input: &str, snapshot: &str) {
}
}
#[track_caller]
fn check_success(input: &str) {
match naga::front::wgsl::parse_str(input) {
Ok(_) => {}
Err(err) => {
panic!(
"expected success, but parsing failed with:\n{}",
err.emit_to_string(input)
);
}
}
}
#[test]
fn very_negative_integers() {
// wgpu#4492
@@ -827,6 +841,50 @@ fn matrix_constructor_inferred() {
);
}
#[test]
fn float16_requires_enable() {
check(
r#"
const a: f16 = 1.0;
"#,
r#"error: the `f16` enable extension is not enabled
┌─ wgsl:2:22
2 │ const a: f16 = 1.0;
│ ^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
= note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
"#,
);
check(
r#"
const a = 1.0h;
"#,
r#"error: the `f16` enable extension is not enabled
┌─ wgsl:2:23
2 │ const a = 1.0h;
│ ^^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
= note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
"#,
);
}
#[test]
fn multiple_enables_valid() {
check_success(
r#"
enable f16;
enable f16;
const a: f16 = 1.0h;
"#,
);
}
/// Check the result of validating a WGSL program against a pattern.
///
/// Unless you are generating code programmatically, the
@@ -932,6 +990,53 @@ fn int64_capability() {
}
}
#[test]
fn float16_capability() {
check_validation! {
"enable f16; var input: f16;",
"enable f16; var input: vec2<f16>;":
Err(naga::valid::ValidationError::Type {
source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}),
..
})
}
}
#[test]
fn float16_in_push_constant() {
check_validation! {
"enable f16; var<push_constant> input: f16;",
"enable f16; var<push_constant> input: vec2<f16>;",
"enable f16; var<push_constant> input: mat4x4<f16>;",
"enable f16; struct S { a: f16 }; var<push_constant> input: S;",
"enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var<push_constant> input: S2;":
Err(naga::valid::ValidationError::GlobalVariable {
source: naga::valid::GlobalVariableError::InvalidPushConstantType(
naga::valid::PushConstantError::InvalidScalar(
naga::Scalar::F16
)
),
..
}),
naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT
}
}
#[test]
fn float16_in_atomic() {
check_validation! {
"enable f16; var<storage> a: atomic<f16>;":
Err(naga::valid::ValidationError::Type {
source: naga::valid::TypeError::InvalidAtomicWidth(
naga::ScalarKind::Float,
2
),
..
}),
naga::valid::Capabilities::SHADER_FLOAT16
}
}
#[test]
fn invalid_arrays() {
check_validation! {
@@ -1356,11 +1461,13 @@ fn invalid_blend_src() {
@fragment
fn main(@builtin(position) position: vec4<f32>) -> FragmentOutput { return FragmentOutput(vec4(0.0), vec4(0.0)); }
",
r###"error: `dual_source_blending` enable-extension is not enabled
r###"error: the `dual_source_blending` enable extension is not enabled
┌─ wgsl:3:27
3 │ @location(0) @blend_src(0) output0: vec4<f32>,
│ ^^^^^^^^^ the `dual_source_blending` enable-extension is needed for this functionality, but it is not currently enabled
│ ^^^^^^^^^ the `dual_source_blending` "Enable Extension" is needed for this functionality, but it is not currently enabled.
= note: You can enable this extension by adding `enable dual_source_blending;` at the top of the shader, before any other items.
"###,
);

View File

@@ -49,6 +49,7 @@ cfg-if.workspace = true
ctor.workspace = true
futures-lite.workspace = true
glam.workspace = true
half = { workspace = true, features = ["bytemuck"] }
itertools.workspace = true
image.workspace = true
libtest-mimic.workspace = true

View File

@@ -143,8 +143,8 @@ impl ShaderTest {
body,
input_type: String::from("CustomStruct"),
output_type: String::from("array<u32>"),
input_values: bytemuck::cast_slice(input_values).to_vec(),
output_values: vec![bytemuck::cast_slice(output_values).to_vec()],
input_values: bytemuck::pod_collect_to_vec(input_values),
output_values: vec![bytemuck::pod_collect_to_vec(output_values)],
output_comparison_fn: Self::default_comparison_function::<O>,
output_initialization: u32::MAX,
failures: Backends::empty(),

View File

@@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits};
use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
#[gpu_test]
static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
// Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
.expect_fail(
FailureCase::backend(wgpu::Backends::VULKAN)
.validation_error("a matrix with stride 8 not satisfying alignment to 16"),
)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::Uniform,
create_struct_layout_tests(InputStorageType::Uniform),
)
});
#[gpu_test]
static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::Storage,
create_struct_layout_tests(InputStorageType::Storage),
)
});
#[gpu_test]
static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::PUSH_CONSTANTS)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits {
max_push_constant_size: MAX_BUFFER_SIZE as u32,
..Limits::downlevel_defaults()
}),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::PushConstant,
create_struct_layout_tests(InputStorageType::PushConstant),
)
});
fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
@@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
tests
}
#[gpu_test]
static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_INT64)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::Storage,
create_64bit_struct_layout_tests(),
)
});
#[gpu_test]
static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_INT64)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::Storage,
create_64bit_struct_layout_tests(),
)
});
#[gpu_test]
static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits {
max_push_constant_size: MAX_BUFFER_SIZE as u32,
..Limits::downlevel_defaults()
}),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::PushConstant,
create_64bit_struct_layout_tests(),
)
});
fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
@@ -356,29 +461,10 @@ fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
}
#[gpu_test]
static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
// Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
.expect_fail(
FailureCase::backend(wgpu::Backends::VULKAN)
.validation_error("a matrix with stride 8 not satisfying alignment to 16"),
)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::Uniform,
create_struct_layout_tests(InputStorageType::Uniform),
)
});
#[gpu_test]
static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_F16)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
@@ -386,34 +472,15 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
shader_input_output_test(
ctx,
InputStorageType::Storage,
create_struct_layout_tests(InputStorageType::Storage),
create_16bit_struct_layout_test(),
)
});
#[gpu_test]
static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::PUSH_CONSTANTS)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits {
max_push_constant_size: MAX_BUFFER_SIZE as u32,
..Limits::downlevel_defaults()
}),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::PushConstant,
create_struct_layout_tests(InputStorageType::PushConstant),
)
});
#[gpu_test]
static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_INT64)
.features(Features::SHADER_F16)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
@@ -421,41 +488,138 @@ static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
shader_input_output_test(
ctx,
InputStorageType::Storage,
create_64bit_struct_layout_tests(),
create_16bit_struct_layout_test(),
)
});
#[gpu_test]
static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_INT64)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits::downlevel_defaults()),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::Storage,
create_64bit_struct_layout_tests(),
)
});
fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
let mut tests = Vec::new();
#[gpu_test]
static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
.limits(Limits {
max_push_constant_size: MAX_BUFFER_SIZE as u32,
..Limits::downlevel_defaults()
}),
)
.run_async(|ctx| {
shader_input_output_test(
ctx,
InputStorageType::PushConstant,
create_64bit_struct_layout_tests(),
)
});
fn f16asu16(f32: f32) -> u16 {
half::f16::from_f32(f32).to_bits()
}
// 16 bit alignment tests
{
let members =
"scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32";
let direct = String::from(
"\
output[0] = u32(input.scalar1);
output[1] = u32(input.scalar2);
output[2] = u32(input.v3.x);
output[3] = u32(input.v3.y);
output[4] = u32(input.v3.z);
output[5] = u32(input.tuck_in);
output[6] = u32(input.scalar4);
output[7] = u32(extractBits(input.larger, 0u, 16u));
output[8] = u32(extractBits(input.larger, 16u, 16u));
",
);
tests.push(ShaderTest::new(
"f16 alignment".into(),
members.into(),
direct,
&[
f16asu16(0.0),
f16asu16(1.0),
f16asu16(2.0),
f16asu16(3.0),
f16asu16(4.0),
f16asu16(5.0),
f16asu16(6.0),
f16asu16(7.0),
f16asu16(8.0),
f16asu16(9.0),
10_u16,
11_u16,
// Some extra values to help debug if the test fails.
12_u16,
13_u16,
14_u16,
15_u16,
16_u16,
17_u16,
18_u16,
19_u16,
20_u16,
],
&[
0, // scalar1
1, // scalar2
4, 5, 6, // v3
7, // tuck_in
8, // scalar4
10, // larger[0..16]
11, // larger[16..32]
],
));
}
// Matrix tests
{
let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h";
let direct = String::from(
"\
output[0] = u32(input.m2[0].x);
output[1] = u32(input.m2[0].y);
output[2] = u32(input.m2[1].x);
output[3] = u32(input.m2[1].y);
output[4] = u32(input.m3[0].x);
output[5] = u32(input.m3[0].y);
output[6] = u32(input.m3[0].z);
output[7] = u32(input.m3[1].x);
output[8] = u32(input.m3[1].y);
output[9] = u32(input.m3[1].z);
output[10] = u32(input.m3[2].x);
output[11] = u32(input.m3[2].y);
output[12] = u32(input.m3[2].z);
output[13] = u32(input.m4[0].x);
output[14] = u32(input.m4[0].y);
output[15] = u32(input.m4[0].z);
output[16] = u32(input.m4[0].w);
output[17] = u32(input.m4[1].x);
output[18] = u32(input.m4[1].y);
output[19] = u32(input.m4[1].z);
output[20] = u32(input.m4[1].w);
output[21] = u32(input.m4[2].x);
output[22] = u32(input.m4[2].y);
output[23] = u32(input.m4[2].z);
output[24] = u32(input.m4[2].w);
output[25] = u32(input.m4[3].x);
output[26] = u32(input.m4[3].y);
output[27] = u32(input.m4[3].z);
output[28] = u32(input.m4[3].w);
",
);
tests.push(ShaderTest::new(
"f16 matrix alignment".into(),
members.into(),
direct,
&(0..32).map(|x| f16asu16(x as f32)).collect::<Vec<_>>(),
&[
0, 1, // m2[0]
2, 3, // m2[1]
//
4, 5, 6, // m3[0]
8, 9, 10, // m3[1]
12, 13, 14, // m3[2]
//
16, 17, 18, 19, // m4[0]
20, 21, 22, 23, // m4[1]
24, 25, 26, 27, // m4[2]
28, 29, 30, 31, // m4[3]
],
));
}
// Insert `enable f16;` header
tests
.into_iter()
.map(|test| test.header("enable f16;".into()))
.collect()
}

View File

@@ -383,6 +383,10 @@ pub fn create_validator(
features.contains(wgt::Features::PUSH_CONSTANTS),
);
caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
caps.set(
Caps::SHADER_FLOAT16,
features.contains(wgt::Features::SHADER_F16),
);
caps.set(
Caps::PRIMITIVE_INDEX,
features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),

View File

@@ -425,6 +425,23 @@ impl super::Adapter {
&& features1.Int64ShaderOps.as_bool(),
);
let float16_supported = {
let mut features4 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4::default();
let hr = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax
ptr::from_mut(&mut features4).cast(),
size_of::<Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4>() as _,
)
};
hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool()
};
features.set(
wgt::Features::SHADER_F16,
shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported,
);
features.set(
wgt::Features::TEXTURE_INT64_ATOMIC,
shader_model >= naga::back::hlsl::ShaderModel::V6_6
@@ -612,7 +629,7 @@ impl crate::Adapter for super::Adapter {
unsafe fn open(
&self,
_features: wgt::Features,
features: wgt::Features,
limits: &wgt::Limits,
memory_hints: &wgt::MemoryHints,
) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -633,6 +650,7 @@ impl crate::Adapter for super::Adapter {
let device = super::Device::new(
self.device.clone(),
queue.clone(),
features,
limits,
memory_hints,
self.private_caps,

View File

@@ -33,9 +33,11 @@ use crate::{
const NAGA_LOCATION_SEMANTIC: &[u8] = c"LOC".to_bytes();
impl super::Device {
#[allow(clippy::too_many_arguments)]
pub(super) fn new(
raw: Direct3D12::ID3D12Device,
present_queue: Direct3D12::ID3D12CommandQueue,
features: wgt::Features,
limits: &wgt::Limits,
memory_hints: &wgt::MemoryHints,
private_caps: super::PrivateCapabilities,
@@ -178,6 +180,7 @@ impl super::Device {
event: Event::create(false, false)?,
},
private_caps,
features,
shared: Arc::new(shared),
rtv_pool: Mutex::new(rtv_pool),
dsv_pool: Mutex::new(descriptor::CpuPool::new(

View File

@@ -634,6 +634,7 @@ pub struct Device {
present_queue: Direct3D12::ID3D12CommandQueue,
idler: Idler,
private_caps: PrivateCapabilities,
features: wgt::Features,
shared: Arc<DeviceShared>,
// CPU only pools
rtv_pool: Mutex<descriptor::CpuPool>,

View File

@@ -268,7 +268,7 @@ pub(super) fn compile_dxc(
let raw_ep = OPCWSTR::new(raw_ep);
let full_stage = OPCWSTR::new(full_stage);
let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 12>::new_const();
let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 13>::new_const();
if let Some(source_name) = source_name.as_ref() {
compile_args.push(source_name.ptr())
@@ -298,6 +298,10 @@ pub(super) fn compile_dxc(
compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS);
}
if device.features.contains(wgt::Features::SHADER_F16) {
compile_args.push(windows::core::w!("-enable-16bit-types"));
}
let buffer = Dxc::DxcBuffer {
Ptr: source.as_ptr().cast(),
Size: source.len(),

View File

@@ -399,6 +399,7 @@ impl PhysicalDeviceFeatures {
vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
vk::PhysicalDevice16BitStorageFeatures::default()
.storage_buffer16_bit_access(true)
.storage_input_output16(true)
.uniform_and_storage_buffer16_bit_access(true),
))
} else {
@@ -720,7 +721,8 @@ impl PhysicalDeviceFeatures {
F::SHADER_F16,
f16_i8.shader_float16 != 0
&& bit16.storage_buffer16_bit_access != 0
&& bit16.uniform_and_storage_buffer16_bit_access != 0,
&& bit16.uniform_and_storage_buffer16_bit_access != 0
&& bit16.storage_input_output16 != 0,
);
}
@@ -1968,6 +1970,10 @@ impl super::Adapter {
capabilities.push(spv::Capability::Int64);
}
if features.contains(wgt::Features::SHADER_F16) {
capabilities.push(spv::Capability::Float16);
}
if features.intersects(
wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
| wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX

View File

@@ -1340,18 +1340,20 @@ bitflags_array! {
/// This is a web and native feature.
const INDIRECT_FIRST_INSTANCE = WEBGPU_FEATURE_INDIRECT_FIRST_INSTANCE;
/// Allows shaders to acquire the FP16 ability
/// Allows shaders to use 16-bit floating point types. You may use them uniform buffers,
/// storage buffers, and local variables. You may not use them in push constants.
///
/// Note: this is not supported in `naga` yet, only through `spirv-passthrough` right now.
/// In order to use this in WGSL shaders, you must add `enable f16;` to the top of your shader,
/// before any global items.
///
/// Supported Platforms:
/// - Vulkan
/// - Metal
/// - DX12
///
/// This is a web and native feature.
const SHADER_F16 = WEBGPU_FEATURE_SHADER_F16;
/// Allows for usage of textures of format [`TextureFormat::Rg11b10Ufloat`] as a render target
///
/// Supported platforms: