mirror of
https://github.com/gfx-rs/wgpu.git
synced 2026-01-09 14:48:08 -05:00
feat: implement F16 support in shaders (#5701)
Co-authored-by: FL33TW00D <fleetwoodpersonal@gmail.com> Co-authored-by: Connor Fitzgerald <connorwadefitzgerald@gmail.com> Co-authored-by: ErichDonGubler <erichdongubler@gmail.com>
This commit is contained in:
committed by
GitHub
parent
b912232188
commit
c6286791fe
14
Cargo.lock
generated
14
Cargo.lock
generated
@@ -1785,8 +1785,12 @@ version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"bytemuck",
|
||||
"cfg-if",
|
||||
"crunchy",
|
||||
"num-traits",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2193,6 +2197,12 @@ dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||
|
||||
[[package]]
|
||||
name = "libredox"
|
||||
version = "0.1.3"
|
||||
@@ -2393,12 +2403,14 @@ dependencies = [
|
||||
"codespan-reporting",
|
||||
"diff",
|
||||
"env_logger",
|
||||
"half",
|
||||
"hashbrown",
|
||||
"hexf-parse",
|
||||
"hlsl-snapshots",
|
||||
"indexmap",
|
||||
"itertools 0.13.0",
|
||||
"log",
|
||||
"num-traits",
|
||||
"petgraph 0.7.1",
|
||||
"pp-rs",
|
||||
"ron",
|
||||
@@ -2574,6 +2586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4843,6 +4856,7 @@ dependencies = [
|
||||
"env_logger",
|
||||
"futures-lite",
|
||||
"glam",
|
||||
"half",
|
||||
"image",
|
||||
"itertools 0.13.0",
|
||||
"js-sys",
|
||||
|
||||
@@ -108,6 +108,7 @@ fern = "0.7"
|
||||
flume = "0.11"
|
||||
futures-lite = "2"
|
||||
glam = "0.29"
|
||||
half = "2.5" # We require 2.5 to have `Arbitrary` support.
|
||||
hashbrown = { version = "0.14.5", default-features = false, features = [
|
||||
"ahash",
|
||||
"inline-more",
|
||||
|
||||
@@ -41,10 +41,17 @@ msl-out = []
|
||||
## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
|
||||
msl-out-if-target-apple = []
|
||||
|
||||
serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"]
|
||||
serialize = [
|
||||
"dep:serde",
|
||||
"bitflags/serde",
|
||||
"half/serde",
|
||||
"hashbrown/serde",
|
||||
"indexmap/serde",
|
||||
]
|
||||
deserialize = [
|
||||
"dep:serde",
|
||||
"bitflags/serde",
|
||||
"half/serde",
|
||||
"hashbrown/serde",
|
||||
"indexmap/serde",
|
||||
]
|
||||
@@ -84,9 +91,12 @@ termcolor = { version = "1.4.1" }
|
||||
# https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
|
||||
codespan-reporting = { version = "0.11.0" }
|
||||
hashbrown.workspace = true
|
||||
half = { workspace = true, features = ["arbitrary", "num-traits"] }
|
||||
rustc-hash.workspace = true
|
||||
indexmap.workspace = true
|
||||
log = "0.4"
|
||||
# `half` requires 0.2.16 for `FromBytes` and `ToBytes`.
|
||||
num-traits = "0.2.16"
|
||||
strum = { workspace = true, optional = true }
|
||||
spirv = { version = "0.3", optional = true }
|
||||
thiserror.workspace = true
|
||||
|
||||
@@ -2703,6 +2703,9 @@ impl<'a, W: Write> Writer<'a, W> {
|
||||
// decimal part even it's zero which is needed for a valid glsl float constant
|
||||
crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?,
|
||||
crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
|
||||
crate::Literal::F16(_) => {
|
||||
return Err(Error::Custom("GLSL has no 16-bit float type".into()));
|
||||
}
|
||||
// Unsigned integers need a `u` at the end
|
||||
//
|
||||
// While `core` doesn't necessarily need it, it's allowed and since `es` needs it we
|
||||
|
||||
@@ -2628,6 +2628,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
// decimal part even it's zero
|
||||
crate::Literal::F64(value) => write!(self.out, "{value:?}L")?,
|
||||
crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
|
||||
crate::Literal::F16(value) => write!(self.out, "{value:?}h")?,
|
||||
crate::Literal::U32(value) => write!(self.out, "{value}u")?,
|
||||
// HLSL has no suffix for explicit i32 literals, but not using any suffix
|
||||
// makes the type ambiguous which prevents overload resolution from
|
||||
|
||||
@@ -8,6 +8,9 @@ use core::{
|
||||
fmt::{Display, Error as FmtError, Formatter, Write},
|
||||
iter,
|
||||
};
|
||||
use num_traits::real::Real as _;
|
||||
|
||||
use half::f16;
|
||||
|
||||
use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo};
|
||||
use crate::{
|
||||
@@ -182,9 +185,11 @@ impl Display for TypeContext<'_> {
|
||||
write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
|
||||
}
|
||||
crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
|
||||
crate::TypeInner::Matrix { columns, rows, .. } => {
|
||||
put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
|
||||
}
|
||||
crate::TypeInner::Matrix {
|
||||
columns,
|
||||
rows,
|
||||
scalar,
|
||||
} => put_numeric_type(out, scalar, &[rows, columns]),
|
||||
crate::TypeInner::Pointer { base, space } => {
|
||||
let sub = Self {
|
||||
handle: base,
|
||||
@@ -425,8 +430,12 @@ impl crate::Scalar {
|
||||
match self {
|
||||
Self {
|
||||
kind: Sk::Float,
|
||||
width: _,
|
||||
width: 4,
|
||||
} => "float",
|
||||
Self {
|
||||
kind: Sk::Float,
|
||||
width: 2,
|
||||
} => "half",
|
||||
Self {
|
||||
kind: Sk::Sint,
|
||||
width: 4,
|
||||
@@ -483,7 +492,7 @@ fn should_pack_struct_member(
|
||||
match *ty_inner {
|
||||
crate::TypeInner::Vector {
|
||||
size: crate::VectorSize::Tri,
|
||||
scalar: scalar @ crate::Scalar { width: 4, .. },
|
||||
scalar: scalar @ crate::Scalar { width: 4 | 2, .. },
|
||||
} if is_tight => Some(scalar),
|
||||
_ => None,
|
||||
}
|
||||
@@ -1459,6 +1468,21 @@ impl<W: Write> Writer<W> {
|
||||
crate::Literal::F64(_) => {
|
||||
return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
|
||||
}
|
||||
crate::Literal::F16(value) => {
|
||||
if value.is_infinite() {
|
||||
let sign = if value.is_sign_negative() { "-" } else { "" };
|
||||
write!(self.out, "{sign}INFINITY")?;
|
||||
} else if value.is_nan() {
|
||||
write!(self.out, "NAN")?;
|
||||
} else {
|
||||
let suffix = if value.fract() == f16::from_f32(0.0) {
|
||||
".0h"
|
||||
} else {
|
||||
"h"
|
||||
};
|
||||
write!(self.out, "{value}{suffix}")?;
|
||||
}
|
||||
}
|
||||
crate::Literal::F32(value) => {
|
||||
if value.is_infinite() {
|
||||
let sign = if value.is_sign_negative() { "-" } else { "" };
|
||||
|
||||
@@ -4,6 +4,7 @@ Implementations for `BlockContext` methods.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use arrayvec::ArrayVec;
|
||||
use spirv::Word;
|
||||
|
||||
use super::{
|
||||
@@ -1612,159 +1613,7 @@ impl BlockContext<'_> {
|
||||
expr,
|
||||
kind,
|
||||
convert,
|
||||
} => {
|
||||
use crate::ScalarKind as Sk;
|
||||
|
||||
let expr_id = self.cached[expr];
|
||||
let (src_scalar, src_size, is_matrix) =
|
||||
match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) {
|
||||
crate::TypeInner::Scalar(scalar) => (scalar, None, false),
|
||||
crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false),
|
||||
crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true),
|
||||
ref other => {
|
||||
log::error!("As source {:?}", other);
|
||||
return Err(Error::Validation("Unexpected Expression::As source"));
|
||||
}
|
||||
};
|
||||
|
||||
enum Cast {
|
||||
Identity,
|
||||
Unary(spirv::Op),
|
||||
Binary(spirv::Op, Word),
|
||||
Ternary(spirv::Op, Word, Word),
|
||||
}
|
||||
|
||||
let cast = if is_matrix {
|
||||
// we only support identity casts for matrices
|
||||
Cast::Unary(spirv::Op::CopyObject)
|
||||
} else {
|
||||
match (src_scalar.kind, kind, convert) {
|
||||
// Filter out identity casts. Some Adreno drivers are
|
||||
// confused by no-op OpBitCast instructions.
|
||||
(src_kind, kind, convert)
|
||||
if src_kind == kind
|
||||
&& convert.filter(|&width| width != src_scalar.width).is_none() =>
|
||||
{
|
||||
Cast::Identity
|
||||
}
|
||||
(Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
|
||||
(_, _, None) => Cast::Unary(spirv::Op::Bitcast),
|
||||
// casting to a bool - generate `OpXxxNotEqual`
|
||||
(_, Sk::Bool, Some(_)) => {
|
||||
let op = match src_scalar.kind {
|
||||
Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
|
||||
Sk::Float => spirv::Op::FUnordNotEqual,
|
||||
Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
|
||||
};
|
||||
let zero_scalar_id =
|
||||
self.writer.get_constant_scalar_with(0, src_scalar)?;
|
||||
let zero_id = match src_size {
|
||||
Some(size) => {
|
||||
let ty = LocalType::Numeric(NumericType::Vector {
|
||||
size,
|
||||
scalar: src_scalar,
|
||||
})
|
||||
.into();
|
||||
|
||||
self.temp_list.clear();
|
||||
self.temp_list.resize(size as _, zero_scalar_id);
|
||||
|
||||
self.writer.get_constant_composite(ty, &self.temp_list)
|
||||
}
|
||||
None => zero_scalar_id,
|
||||
};
|
||||
|
||||
Cast::Binary(op, zero_id)
|
||||
}
|
||||
// casting from a bool - generate `OpSelect`
|
||||
(Sk::Bool, _, Some(dst_width)) => {
|
||||
let dst_scalar = crate::Scalar {
|
||||
kind,
|
||||
width: dst_width,
|
||||
};
|
||||
let zero_scalar_id =
|
||||
self.writer.get_constant_scalar_with(0, dst_scalar)?;
|
||||
let one_scalar_id =
|
||||
self.writer.get_constant_scalar_with(1, dst_scalar)?;
|
||||
let (accept_id, reject_id) = match src_size {
|
||||
Some(size) => {
|
||||
let ty = LocalType::Numeric(NumericType::Vector {
|
||||
size,
|
||||
scalar: dst_scalar,
|
||||
})
|
||||
.into();
|
||||
|
||||
self.temp_list.clear();
|
||||
self.temp_list.resize(size as _, zero_scalar_id);
|
||||
|
||||
let vec0_id =
|
||||
self.writer.get_constant_composite(ty, &self.temp_list);
|
||||
|
||||
self.temp_list.fill(one_scalar_id);
|
||||
|
||||
let vec1_id =
|
||||
self.writer.get_constant_composite(ty, &self.temp_list);
|
||||
|
||||
(vec1_id, vec0_id)
|
||||
}
|
||||
None => (one_scalar_id, zero_scalar_id),
|
||||
};
|
||||
|
||||
Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
|
||||
}
|
||||
(Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
|
||||
(Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
|
||||
(Sk::Float, Sk::Float, Some(dst_width))
|
||||
if src_scalar.width != dst_width =>
|
||||
{
|
||||
Cast::Unary(spirv::Op::FConvert)
|
||||
}
|
||||
(Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
|
||||
(Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::SConvert)
|
||||
}
|
||||
(Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
|
||||
(Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::UConvert)
|
||||
}
|
||||
(Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::SConvert)
|
||||
}
|
||||
(Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::UConvert)
|
||||
}
|
||||
// We assume it's either an identity cast, or int-uint.
|
||||
_ => Cast::Unary(spirv::Op::Bitcast),
|
||||
}
|
||||
};
|
||||
|
||||
let id = self.gen_id();
|
||||
let instruction = match cast {
|
||||
Cast::Identity => None,
|
||||
Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)),
|
||||
Cast::Binary(op, operand) => Some(Instruction::binary(
|
||||
op,
|
||||
result_type_id,
|
||||
id,
|
||||
expr_id,
|
||||
operand,
|
||||
)),
|
||||
Cast::Ternary(op, op1, op2) => Some(Instruction::ternary(
|
||||
op,
|
||||
result_type_id,
|
||||
id,
|
||||
expr_id,
|
||||
op1,
|
||||
op2,
|
||||
)),
|
||||
};
|
||||
if let Some(instruction) = instruction {
|
||||
block.body.push(instruction);
|
||||
id
|
||||
} else {
|
||||
expr_id
|
||||
}
|
||||
}
|
||||
} => self.write_as_expression(expr, convert, kind, block, result_type_id)?,
|
||||
crate::Expression::ImageLoad {
|
||||
image,
|
||||
coordinate,
|
||||
@@ -1925,6 +1774,237 @@ impl BlockContext<'_> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper which focuses on generating the `As` expressions and the various conversions
|
||||
/// that need to happen because of that.
|
||||
fn write_as_expression(
|
||||
&mut self,
|
||||
expr: Handle<crate::Expression>,
|
||||
convert: Option<u8>,
|
||||
kind: crate::ScalarKind,
|
||||
|
||||
block: &mut Block,
|
||||
result_type_id: u32,
|
||||
) -> Result<u32, Error> {
|
||||
use crate::ScalarKind as Sk;
|
||||
let expr_id = self.cached[expr];
|
||||
let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types);
|
||||
|
||||
// Matrix casts needs special treatment in SPIR-V, as the cast functions
|
||||
// can take vectors or scalars, but not matrices. In order to cast a matrix
|
||||
// we need to cast each column of the matrix individually and construct a new
|
||||
// matrix from the converted columns.
|
||||
if let crate::TypeInner::Matrix {
|
||||
columns,
|
||||
rows,
|
||||
scalar,
|
||||
} = *ty
|
||||
{
|
||||
let Some(convert) = convert else {
|
||||
// No conversion needs to be done, passes through.
|
||||
return Ok(expr_id);
|
||||
};
|
||||
|
||||
if convert == scalar.width {
|
||||
// No conversion needs to be done, passes through.
|
||||
return Ok(expr_id);
|
||||
}
|
||||
|
||||
if kind != Sk::Float {
|
||||
// Only float conversions are supported for matrices.
|
||||
return Err(Error::Validation("Matrices must be floats"));
|
||||
}
|
||||
|
||||
// Type of each extracted column
|
||||
let column_src_ty =
|
||||
self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
|
||||
size: rows,
|
||||
scalar,
|
||||
})));
|
||||
|
||||
// Type of the column after conversion
|
||||
let column_dst_ty =
|
||||
self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
|
||||
size: rows,
|
||||
scalar: crate::Scalar {
|
||||
kind,
|
||||
width: convert,
|
||||
},
|
||||
})));
|
||||
|
||||
let mut components = ArrayVec::<Word, 4>::new();
|
||||
|
||||
for column in 0..columns as usize {
|
||||
let column_id = self.gen_id();
|
||||
block.body.push(Instruction::composite_extract(
|
||||
column_src_ty,
|
||||
column_id,
|
||||
expr_id,
|
||||
&[column as u32],
|
||||
));
|
||||
|
||||
let column_conv_id = self.gen_id();
|
||||
block.body.push(Instruction::unary(
|
||||
spirv::Op::FConvert,
|
||||
column_dst_ty,
|
||||
column_conv_id,
|
||||
column_id,
|
||||
));
|
||||
|
||||
components.push(column_conv_id);
|
||||
}
|
||||
|
||||
let construct_id = self.gen_id();
|
||||
|
||||
block.body.push(Instruction::composite_construct(
|
||||
result_type_id,
|
||||
construct_id,
|
||||
&components,
|
||||
));
|
||||
|
||||
return Ok(construct_id);
|
||||
}
|
||||
|
||||
let (src_scalar, src_size) = match *ty {
|
||||
crate::TypeInner::Scalar(scalar) => (scalar, None),
|
||||
crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)),
|
||||
ref other => {
|
||||
log::error!("As source {:?}", other);
|
||||
return Err(Error::Validation("Unexpected Expression::As source"));
|
||||
}
|
||||
};
|
||||
|
||||
enum Cast {
|
||||
Identity,
|
||||
Unary(spirv::Op),
|
||||
Binary(spirv::Op, Word),
|
||||
Ternary(spirv::Op, Word, Word),
|
||||
}
|
||||
let cast = match (src_scalar.kind, kind, convert) {
|
||||
// Filter out identity casts. Some Adreno drivers are
|
||||
// confused by no-op OpBitCast instructions.
|
||||
(src_kind, kind, convert)
|
||||
if src_kind == kind
|
||||
&& convert.filter(|&width| width != src_scalar.width).is_none() =>
|
||||
{
|
||||
Cast::Identity
|
||||
}
|
||||
(Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
|
||||
(_, _, None) => Cast::Unary(spirv::Op::Bitcast),
|
||||
// casting to a bool - generate `OpXxxNotEqual`
|
||||
(_, Sk::Bool, Some(_)) => {
|
||||
let op = match src_scalar.kind {
|
||||
Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
|
||||
Sk::Float => spirv::Op::FUnordNotEqual,
|
||||
Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
|
||||
};
|
||||
let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?;
|
||||
let zero_id = match src_size {
|
||||
Some(size) => {
|
||||
let ty = LocalType::Numeric(NumericType::Vector {
|
||||
size,
|
||||
scalar: src_scalar,
|
||||
})
|
||||
.into();
|
||||
|
||||
self.temp_list.clear();
|
||||
self.temp_list.resize(size as _, zero_scalar_id);
|
||||
|
||||
self.writer.get_constant_composite(ty, &self.temp_list)
|
||||
}
|
||||
None => zero_scalar_id,
|
||||
};
|
||||
|
||||
Cast::Binary(op, zero_id)
|
||||
}
|
||||
// casting from a bool - generate `OpSelect`
|
||||
(Sk::Bool, _, Some(dst_width)) => {
|
||||
let dst_scalar = crate::Scalar {
|
||||
kind,
|
||||
width: dst_width,
|
||||
};
|
||||
let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?;
|
||||
let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?;
|
||||
let (accept_id, reject_id) = match src_size {
|
||||
Some(size) => {
|
||||
let ty = LocalType::Numeric(NumericType::Vector {
|
||||
size,
|
||||
scalar: dst_scalar,
|
||||
})
|
||||
.into();
|
||||
|
||||
self.temp_list.clear();
|
||||
self.temp_list.resize(size as _, zero_scalar_id);
|
||||
|
||||
let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list);
|
||||
|
||||
self.temp_list.fill(one_scalar_id);
|
||||
|
||||
let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list);
|
||||
|
||||
(vec1_id, vec0_id)
|
||||
}
|
||||
None => (one_scalar_id, zero_scalar_id),
|
||||
};
|
||||
|
||||
Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
|
||||
}
|
||||
(Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
|
||||
(Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
|
||||
(Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::FConvert)
|
||||
}
|
||||
(Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
|
||||
(Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::SConvert)
|
||||
}
|
||||
(Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
|
||||
(Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::UConvert)
|
||||
}
|
||||
(Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::SConvert)
|
||||
}
|
||||
(Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
|
||||
Cast::Unary(spirv::Op::UConvert)
|
||||
}
|
||||
// We assume it's either an identity cast, or int-uint.
|
||||
_ => Cast::Unary(spirv::Op::Bitcast),
|
||||
};
|
||||
Ok(match cast {
|
||||
Cast::Identity => expr_id,
|
||||
Cast::Unary(op) => {
|
||||
let id = self.gen_id();
|
||||
block
|
||||
.body
|
||||
.push(Instruction::unary(op, result_type_id, id, expr_id));
|
||||
id
|
||||
}
|
||||
Cast::Binary(op, operand) => {
|
||||
let id = self.gen_id();
|
||||
block.body.push(Instruction::binary(
|
||||
op,
|
||||
result_type_id,
|
||||
id,
|
||||
expr_id,
|
||||
operand,
|
||||
));
|
||||
id
|
||||
}
|
||||
Cast::Ternary(op, op1, op2) => {
|
||||
let id = self.gen_id();
|
||||
block.body.push(Instruction::ternary(
|
||||
op,
|
||||
result_type_id,
|
||||
id,
|
||||
expr_id,
|
||||
op1,
|
||||
op2,
|
||||
));
|
||||
id
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Build an `OpAccessChain` instruction.
|
||||
///
|
||||
/// Emit any needed bounds-checking expressions to `block`.
|
||||
|
||||
@@ -405,6 +405,10 @@ impl super::Instruction {
|
||||
instruction
|
||||
}
|
||||
|
||||
pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self {
|
||||
Self::constant(result_type_id, id, &[low])
|
||||
}
|
||||
|
||||
pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self {
|
||||
Self::constant(result_type_id, id, &[value])
|
||||
}
|
||||
|
||||
@@ -1154,6 +1154,15 @@ impl Writer {
|
||||
if bits == 64 {
|
||||
self.capabilities_used.insert(spirv::Capability::Float64);
|
||||
}
|
||||
if bits == 16 {
|
||||
self.capabilities_used.insert(spirv::Capability::Float16);
|
||||
self.capabilities_used
|
||||
.insert(spirv::Capability::StorageBuffer16BitAccess);
|
||||
self.capabilities_used
|
||||
.insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||
self.capabilities_used
|
||||
.insert(spirv::Capability::StorageInputOutput16);
|
||||
}
|
||||
Instruction::type_float(id, bits)
|
||||
}
|
||||
Sk::Bool => Instruction::type_bool(id),
|
||||
@@ -1222,6 +1231,19 @@ impl Writer {
|
||||
)?;
|
||||
self.use_extension("SPV_EXT_shader_atomic_float_add");
|
||||
}
|
||||
// 16 bit floating-point support requires Float16 capability
|
||||
crate::TypeInner::Matrix {
|
||||
scalar: crate::Scalar::F16,
|
||||
..
|
||||
}
|
||||
| crate::TypeInner::Vector {
|
||||
scalar: crate::Scalar::F16,
|
||||
..
|
||||
}
|
||||
| crate::TypeInner::Scalar(crate::Scalar::F16) => {
|
||||
self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?;
|
||||
self.use_extension("SPV_KHR_16bit_storage");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
@@ -1498,6 +1520,10 @@ impl Writer {
|
||||
Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32)
|
||||
}
|
||||
crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
|
||||
crate::Literal::F16(value) => {
|
||||
let low = value.to_bits();
|
||||
Instruction::constant_16bit(type_id, id, low as u32)
|
||||
}
|
||||
crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
|
||||
crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
|
||||
crate::Literal::U64(value) => {
|
||||
|
||||
@@ -5,6 +5,7 @@ use alloc::{
|
||||
vec::Vec,
|
||||
};
|
||||
use core::fmt::Write;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
use super::Error;
|
||||
use super::ToWgslIfImplemented as _;
|
||||
@@ -129,6 +130,9 @@ impl<W: Write> Writer<W> {
|
||||
// Write all needed directives.
|
||||
self.write_enable_dual_source_blending_if_needed(module)?;
|
||||
|
||||
// Write all `enable` declarations
|
||||
self.write_enable_declarations(module)?;
|
||||
|
||||
// Write all structs
|
||||
for (handle, ty) in module.types.iter() {
|
||||
if let TypeInner::Struct { ref members, .. } = ty.inner {
|
||||
@@ -222,6 +226,41 @@ impl<W: Write> Writer<W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper method which writes all the `enable` declarations
|
||||
/// needed for a module.
|
||||
fn write_enable_declarations(&mut self, module: &Module) -> BackendResult {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
enum WrittenDeclarations {
|
||||
F16,
|
||||
}
|
||||
|
||||
let mut written_declarations = HashSet::new();
|
||||
|
||||
// Write all the `enable` declarations
|
||||
for (_, ty) in module.types.iter() {
|
||||
match ty.inner {
|
||||
TypeInner::Scalar(scalar)
|
||||
| TypeInner::Vector { scalar, .. }
|
||||
| TypeInner::Matrix { scalar, .. } => {
|
||||
if scalar == crate::Scalar::F16
|
||||
&& !written_declarations.contains(&WrittenDeclarations::F16)
|
||||
{
|
||||
writeln!(self.out, "enable f16;")?;
|
||||
written_declarations.insert(WrittenDeclarations::F16);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if !written_declarations.is_empty() {
|
||||
// Empty line for readability
|
||||
writeln!(self.out)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper method used to write
|
||||
/// [functions](https://gpuweb.github.io/gpuweb/wgsl/#functions)
|
||||
///
|
||||
@@ -1092,6 +1131,7 @@ impl<W: Write> Writer<W> {
|
||||
|
||||
match expressions[expr] {
|
||||
Expression::Literal(literal) => match literal {
|
||||
crate::Literal::F16(value) => write!(self.out, "{value}h")?,
|
||||
crate::Literal::F32(value) => write!(self.out, "{value}f")?,
|
||||
crate::Literal::U32(value) => write!(self.out, "{value}u")?,
|
||||
crate::Literal::I32(value) => {
|
||||
|
||||
@@ -270,6 +270,7 @@ impl TryToWgsl for crate::Scalar {
|
||||
Some(match self {
|
||||
Scalar::F64 => "f64",
|
||||
Scalar::F32 => "f32",
|
||||
Scalar::F16 => "f16",
|
||||
Scalar::I32 => "i32",
|
||||
Scalar::U32 => "u32",
|
||||
Scalar::I64 => "i64",
|
||||
|
||||
@@ -109,9 +109,15 @@ pub enum ErrorKind {
|
||||
/// Unsupported matrix of the form matCx2
|
||||
///
|
||||
/// Our IR expects matrices of the form matCx2 to have a stride of 8 however
|
||||
/// matrices in the std140 layout have a stride of at least 16
|
||||
#[error("unsupported matrix of the form matCx2 in std140 block layout")]
|
||||
UnsupportedMatrixTypeInStd140,
|
||||
/// matrices in the std140 layout have a stride of at least 16.
|
||||
#[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
|
||||
UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 },
|
||||
/// Unsupported matrix of the form f16matCxR
|
||||
///
|
||||
/// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count,
|
||||
/// however matrices in the std140 layout have a stride of at least 16.
|
||||
#[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
|
||||
UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 },
|
||||
/// A variable with the same name already exists in the current scope.
|
||||
#[error("Variable already declared: {0}")]
|
||||
VariableAlreadyDeclared(String),
|
||||
|
||||
@@ -122,11 +122,25 @@ pub fn calculate_offset(
|
||||
}
|
||||
|
||||
// See comment on the error kind
|
||||
if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi {
|
||||
errors.push(Error {
|
||||
kind: ErrorKind::UnsupportedMatrixTypeInStd140,
|
||||
meta,
|
||||
});
|
||||
if StructLayout::Std140 == layout {
|
||||
// Do the f16 test first, as it's more specific
|
||||
if scalar == Scalar::F16 {
|
||||
errors.push(Error {
|
||||
kind: ErrorKind::UnsupportedF16MatrixInStd140 {
|
||||
columns: columns as u8,
|
||||
rows: rows as u8,
|
||||
},
|
||||
meta,
|
||||
});
|
||||
}
|
||||
if rows == crate::VectorSize::Bi {
|
||||
errors.push(Error {
|
||||
kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 {
|
||||
columns: columns as u8,
|
||||
},
|
||||
meta,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(align, align * columns as u32)
|
||||
|
||||
@@ -12,6 +12,10 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
|
||||
name: None,
|
||||
inner: TypeInner::Scalar(Scalar::BOOL),
|
||||
}),
|
||||
"float16_t" => Some(Type {
|
||||
name: None,
|
||||
inner: TypeInner::Scalar(Scalar::F16),
|
||||
}),
|
||||
"float" => Some(Type {
|
||||
name: None,
|
||||
inner: TypeInner::Scalar(Scalar::F32),
|
||||
@@ -42,6 +46,7 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
|
||||
"i" => Scalar::I32,
|
||||
"u" => Scalar::U32,
|
||||
"d" => Scalar::F64,
|
||||
"f16" => Scalar::F16,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ use alloc::{borrow::ToOwned, format, string::String, vec, vec::Vec};
|
||||
use core::{convert::TryInto, mem, num::NonZeroU32};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use half::f16;
|
||||
use petgraph::graphmap::GraphMap;
|
||||
|
||||
use super::atomic_upgrade::Upgrades;
|
||||
@@ -82,6 +83,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
|
||||
"SPV_KHR_vulkan_memory_model",
|
||||
"SPV_KHR_multiview",
|
||||
"SPV_EXT_shader_atomic_float_add",
|
||||
"SPV_KHR_16bit_storage",
|
||||
];
|
||||
pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];
|
||||
|
||||
@@ -5604,6 +5606,9 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
|
||||
}) => {
|
||||
let low = self.next()?;
|
||||
match width {
|
||||
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal
|
||||
// If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word.
|
||||
2 => crate::Literal::F16(f16::from_bits(low as u16)),
|
||||
4 => crate::Literal::F32(f32::from_bits(low)),
|
||||
8 => {
|
||||
inst.expect(5)?;
|
||||
|
||||
@@ -153,8 +153,6 @@ pub enum NumberError {
|
||||
Invalid,
|
||||
#[error("numeric literal not representable by target type")]
|
||||
NotRepresentable,
|
||||
#[error("unimplemented f16 type")]
|
||||
UnimplementedF16,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
@@ -1026,23 +1024,22 @@ impl<'a> Error<'a> {
|
||||
)],
|
||||
},
|
||||
Error::EnableExtensionNotEnabled { kind, span } => ParseError {
|
||||
message: format!("`{}` enable-extension is not enabled", kind.to_ident()),
|
||||
message: format!("the `{}` enable extension is not enabled", kind.to_ident()),
|
||||
labels: vec![(
|
||||
span,
|
||||
format!(
|
||||
concat!(
|
||||
"the `{}` enable-extension is needed for this functionality, ",
|
||||
"but it is not currently enabled"
|
||||
"the `{}` \"Enable Extension\" is needed for this functionality, ",
|
||||
"but it is not currently enabled."
|
||||
),
|
||||
kind.to_ident()
|
||||
)
|
||||
.into(),
|
||||
)],
|
||||
#[allow(irrefutable_let_patterns)]
|
||||
notes: if let EnableExtension::Unimplemented(kind) = kind {
|
||||
vec![format!(
|
||||
concat!(
|
||||
"This enable-extension is not yet implemented. ",
|
||||
"This \"Enable Extension\" is not yet implemented. ",
|
||||
"Let Naga maintainers know that you ran into this at ",
|
||||
"<https://github.com/gfx-rs/wgpu/issues/{}>, ",
|
||||
"so they can prioritize it!"
|
||||
@@ -1050,7 +1047,12 @@ impl<'a> Error<'a> {
|
||||
kind.tracking_issue_num()
|
||||
)]
|
||||
} else {
|
||||
vec![]
|
||||
vec![
|
||||
format!(
|
||||
"You can enable this extension by adding `enable {};` at the top of the shader, before any other items.",
|
||||
kind.to_ident()
|
||||
),
|
||||
]
|
||||
},
|
||||
},
|
||||
Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError {
|
||||
|
||||
@@ -2013,6 +2013,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
|
||||
let expr: Typed<crate::Expression> = match *expr {
|
||||
ast::Expression::Literal(literal) => {
|
||||
let literal = match literal {
|
||||
ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f),
|
||||
ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
|
||||
ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
|
||||
ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
use crate::front::wgsl::parse::directive::enable_extension::{
|
||||
EnableExtensions, ImplementedEnableExtension,
|
||||
};
|
||||
use crate::front::wgsl::{Error, Result, Scalar};
|
||||
use crate::Span;
|
||||
|
||||
@@ -113,10 +116,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<'_, crate::StorageFo
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_scalar_type(word: &str) -> Option<Scalar> {
|
||||
pub fn get_scalar_type(
|
||||
enable_extensions: &EnableExtensions,
|
||||
span: Span,
|
||||
word: &str,
|
||||
) -> Result<'static, Option<Scalar>> {
|
||||
use crate::ScalarKind as Sk;
|
||||
match word {
|
||||
// "f16" => Some(Scalar { kind: Sk::Float, width: 2 }),
|
||||
let scalar = match word {
|
||||
"f16" => Some(Scalar {
|
||||
kind: Sk::Float,
|
||||
width: 2,
|
||||
}),
|
||||
"f32" => Some(Scalar {
|
||||
kind: Sk::Float,
|
||||
width: 4,
|
||||
@@ -146,7 +156,18 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
|
||||
width: crate::BOOL_WIDTH,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if matches!(scalar, Some(Scalar::F16))
|
||||
&& !enable_extensions.contains(ImplementedEnableExtension::F16)
|
||||
{
|
||||
return Err(Box::new(Error::EnableExtensionNotEnabled {
|
||||
span,
|
||||
kind: ImplementedEnableExtension::F16.into(),
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(scalar)
|
||||
}
|
||||
|
||||
pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> {
|
||||
|
||||
@@ -11,19 +11,23 @@ use alloc::boxed::Box;
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct EnableExtensions {
|
||||
dual_source_blending: bool,
|
||||
/// Whether `enable f16;` was written earlier in the shader module.
|
||||
f16: bool,
|
||||
}
|
||||
|
||||
impl EnableExtensions {
|
||||
pub(crate) const fn empty() -> Self {
|
||||
Self {
|
||||
f16: false,
|
||||
dual_source_blending: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an enable-extension to the set requested by a module.
|
||||
pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) {
|
||||
let field: &mut bool = match ext {
|
||||
let field = match ext {
|
||||
ImplementedEnableExtension::DualSourceBlending => &mut self.dual_source_blending,
|
||||
ImplementedEnableExtension::F16 => &mut self.f16,
|
||||
};
|
||||
*field = true;
|
||||
}
|
||||
@@ -32,6 +36,7 @@ impl EnableExtensions {
|
||||
pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool {
|
||||
match ext {
|
||||
ImplementedEnableExtension::DualSourceBlending => self.dual_source_blending,
|
||||
ImplementedEnableExtension::F16 => self.f16,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -47,7 +52,6 @@ impl Default for EnableExtensions {
|
||||
/// WGSL spec.: <https://www.w3.org/TR/WGSL/#enable-extensions-sec>
|
||||
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
|
||||
pub enum EnableExtension {
|
||||
#[allow(unused)]
|
||||
Implemented(ImplementedEnableExtension),
|
||||
Unimplemented(UnimplementedEnableExtension),
|
||||
}
|
||||
@@ -66,7 +70,7 @@ impl EnableExtension {
|
||||
/// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible.
|
||||
pub(crate) fn from_ident(word: &str, span: Span) -> Result<Self> {
|
||||
Ok(match word {
|
||||
Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16),
|
||||
Self::F16 => Self::Implemented(ImplementedEnableExtension::F16),
|
||||
Self::CLIP_DISTANCES => {
|
||||
Self::Unimplemented(UnimplementedEnableExtension::ClipDistances)
|
||||
}
|
||||
@@ -82,10 +86,9 @@ impl EnableExtension {
|
||||
match self {
|
||||
Self::Implemented(kind) => match kind {
|
||||
ImplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING,
|
||||
ImplementedEnableExtension::F16 => Self::F16,
|
||||
},
|
||||
|
||||
Self::Unimplemented(kind) => match kind {
|
||||
UnimplementedEnableExtension::F16 => Self::F16,
|
||||
UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES,
|
||||
},
|
||||
}
|
||||
@@ -101,17 +104,17 @@ pub enum ImplementedEnableExtension {
|
||||
///
|
||||
/// [`enable dual_source_blending;`]: https://www.w3.org/TR/WGSL/#extension-dual_source_blending
|
||||
DualSourceBlending,
|
||||
}
|
||||
|
||||
/// A variant of [`EnableExtension::Unimplemented`].
|
||||
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
|
||||
pub enum UnimplementedEnableExtension {
|
||||
/// Enables `f16`/`half` primitive support in all shader languages.
|
||||
///
|
||||
/// In the WGSL standard, this corresponds to [`enable f16;`].
|
||||
///
|
||||
/// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16
|
||||
F16,
|
||||
}
|
||||
|
||||
/// A variant of [`EnableExtension::Unimplemented`].
|
||||
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
|
||||
pub enum UnimplementedEnableExtension {
|
||||
/// Enables the `clip_distances` variable in WGSL.
|
||||
///
|
||||
/// In the WGSL standard, this corresponds to [`enable clip_distances;`].
|
||||
@@ -123,7 +126,6 @@ pub enum UnimplementedEnableExtension {
|
||||
impl UnimplementedEnableExtension {
|
||||
pub(crate) const fn tracking_issue_num(self) -> u16 {
|
||||
match self {
|
||||
Self::F16 => 4384,
|
||||
Self::ClipDistances => 6236,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,14 +413,17 @@ impl<'a> Lexer<'a> {
|
||||
/// Parses a generic scalar type, for example `<f32>`.
|
||||
pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> {
|
||||
self.expect_generic_paren('<')?;
|
||||
let pair = match self.next() {
|
||||
let (scalar, _span) = match self.next() {
|
||||
(Token::Word(word), span) => {
|
||||
conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
|
||||
conv::get_scalar_type(&self.enable_extensions, span, word)?
|
||||
.map(|scalar| (scalar, span))
|
||||
.ok_or(Error::UnknownScalarType(span))?
|
||||
}
|
||||
(_, span) => Err(Error::UnknownScalarType(span)),
|
||||
}?;
|
||||
(_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
|
||||
};
|
||||
|
||||
self.expect_generic_paren('>')?;
|
||||
Ok(pair)
|
||||
Ok(scalar)
|
||||
}
|
||||
|
||||
/// Parses a generic scalar type, for example `<f32>`.
|
||||
@@ -430,14 +433,18 @@ impl<'a> Lexer<'a> {
|
||||
&mut self,
|
||||
) -> Result<'a, (Scalar, Span)> {
|
||||
self.expect_generic_paren('<')?;
|
||||
let pair = match self.next() {
|
||||
(Token::Word(word), span) => conv::get_scalar_type(word)
|
||||
.map(|scalar| (scalar, span))
|
||||
.ok_or(Error::UnknownScalarType(span)),
|
||||
(_, span) => Err(Error::UnknownScalarType(span)),
|
||||
}?;
|
||||
|
||||
let (scalar, span) = match self.next() {
|
||||
(Token::Word(word), span) => {
|
||||
conv::get_scalar_type(&self.enable_extensions, span, word)?
|
||||
.map(|scalar| (scalar, span))
|
||||
.ok_or(Error::UnknownScalarType(span))?
|
||||
}
|
||||
(_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
|
||||
};
|
||||
|
||||
self.expect_generic_paren('>')?;
|
||||
Ok(pair)
|
||||
Ok((scalar, span))
|
||||
}
|
||||
|
||||
pub(in crate::front::wgsl) fn next_storage_access(
|
||||
@@ -518,6 +525,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) {
|
||||
|
||||
#[test]
|
||||
fn test_numbers() {
|
||||
use half::f16;
|
||||
// WGSL spec examples //
|
||||
|
||||
// decimal integer
|
||||
@@ -542,14 +550,16 @@ fn test_numbers() {
|
||||
Token::Number(Ok(Number::AbstractFloat(0.01))),
|
||||
Token::Number(Ok(Number::AbstractFloat(12.34))),
|
||||
Token::Number(Ok(Number::F32(0.))),
|
||||
Token::Number(Err(NumberError::UnimplementedF16)),
|
||||
Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
|
||||
Token::Number(Ok(Number::AbstractFloat(0.001))),
|
||||
Token::Number(Ok(Number::AbstractFloat(43.75))),
|
||||
Token::Number(Ok(Number::F32(16.))),
|
||||
Token::Number(Ok(Number::AbstractFloat(0.1875))),
|
||||
Token::Number(Err(NumberError::UnimplementedF16)),
|
||||
// https://github.com/gfx-rs/wgpu/issues/7046
|
||||
Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
|
||||
Token::Number(Ok(Number::AbstractFloat(0.12109375))),
|
||||
Token::Number(Err(NumberError::UnimplementedF16)),
|
||||
// https://github.com/gfx-rs/wgpu/issues/7046
|
||||
Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
|
||||
],
|
||||
);
|
||||
|
||||
|
||||
@@ -6,9 +6,7 @@ use crate::diagnostic_filter::{
|
||||
ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule,
|
||||
};
|
||||
use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken};
|
||||
use crate::front::wgsl::parse::directive::enable_extension::{
|
||||
EnableExtension, EnableExtensions, UnimplementedEnableExtension,
|
||||
};
|
||||
use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions};
|
||||
use crate::front::wgsl::parse::directive::language_extension::LanguageExtension;
|
||||
use crate::front::wgsl::parse::directive::DirectiveKind;
|
||||
use crate::front::wgsl::parse::lexer::{Lexer, Token};
|
||||
@@ -364,7 +362,7 @@ impl Parser {
|
||||
span: Span,
|
||||
ctx: &mut ExpressionContext<'a, '_, '_>,
|
||||
) -> Result<'a, Option<ast::ConstructorType<'a>>> {
|
||||
if let Some(scalar) = conv::get_scalar_type(word) {
|
||||
if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
|
||||
return Ok(Some(ast::ConstructorType::Scalar(scalar)));
|
||||
}
|
||||
|
||||
@@ -393,6 +391,13 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"vec2h" => {
|
||||
return Ok(Some(ast::ConstructorType::Vector {
|
||||
size: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"vec3" => ast::ConstructorType::PartialVector {
|
||||
size: crate::VectorSize::Tri,
|
||||
},
|
||||
@@ -417,6 +422,13 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"vec3h" => {
|
||||
return Ok(Some(ast::ConstructorType::Vector {
|
||||
size: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"vec4" => ast::ConstructorType::PartialVector {
|
||||
size: crate::VectorSize::Quad,
|
||||
},
|
||||
@@ -441,6 +453,13 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"vec4h" => {
|
||||
return Ok(Some(ast::ConstructorType::Vector {
|
||||
size: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat2x2" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Bi,
|
||||
@@ -453,6 +472,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat2x2h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat2x3" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Tri,
|
||||
@@ -465,6 +492,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat2x3h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat2x4" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Quad,
|
||||
@@ -477,6 +512,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat2x4h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat3x2" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Bi,
|
||||
@@ -489,6 +532,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat3x2h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat3x3" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Tri,
|
||||
@@ -501,6 +552,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat3x3h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat3x4" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Quad,
|
||||
@@ -513,6 +572,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat3x4h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat4x2" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Bi,
|
||||
@@ -525,6 +592,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat4x2h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat4x3" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Tri,
|
||||
@@ -537,6 +612,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat4x3h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat4x4" => ast::ConstructorType::PartialMatrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Quad,
|
||||
@@ -549,6 +632,14 @@ impl Parser {
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"mat4x4h" => {
|
||||
return Ok(Some(ast::ConstructorType::Matrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
}))
|
||||
}
|
||||
"array" => ast::ConstructorType::PartialArray,
|
||||
"atomic"
|
||||
| "binding_array"
|
||||
@@ -744,15 +835,17 @@ impl Parser {
|
||||
}
|
||||
(Token::Number(res), span) => {
|
||||
let _ = lexer.next();
|
||||
let num = res.map_err(|err| match err {
|
||||
super::error::NumberError::UnimplementedF16 => {
|
||||
Error::EnableExtensionNotEnabled {
|
||||
kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16),
|
||||
let num = res.map_err(|err| Error::BadNumber(span, err))?;
|
||||
|
||||
if let Some(enable_extension) = num.requires_enable_extension() {
|
||||
if !lexer.enable_extensions.contains(enable_extension) {
|
||||
return Err(Box::new(Error::EnableExtensionNotEnabled {
|
||||
kind: enable_extension.into(),
|
||||
span,
|
||||
}
|
||||
}));
|
||||
}
|
||||
err => Error::BadNumber(span, err),
|
||||
})?;
|
||||
}
|
||||
|
||||
ast::Expression::Literal(ast::Literal::Number(num))
|
||||
}
|
||||
(Token::Word("RAY_FLAG_NONE"), _) => {
|
||||
@@ -1340,9 +1433,10 @@ impl Parser {
|
||||
&mut self,
|
||||
lexer: &mut Lexer<'a>,
|
||||
word: &'a str,
|
||||
span: Span,
|
||||
ctx: &mut ExpressionContext<'a, '_, '_>,
|
||||
) -> Result<'a, Option<ast::Type<'a>>> {
|
||||
if let Some(scalar) = conv::get_scalar_type(word) {
|
||||
if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
|
||||
return Ok(Some(ast::Type::Scalar(scalar)));
|
||||
}
|
||||
|
||||
@@ -1370,6 +1464,11 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"vec2h" => ast::Type::Vector {
|
||||
size: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"vec3" => {
|
||||
let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
|
||||
ast::Type::Vector {
|
||||
@@ -1393,6 +1492,11 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"vec3h" => ast::Type::Vector {
|
||||
size: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"vec4" => {
|
||||
let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
|
||||
ast::Type::Vector {
|
||||
@@ -1416,6 +1520,11 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"vec4h" => ast::Type::Vector {
|
||||
size: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat2x2" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)?
|
||||
}
|
||||
@@ -1425,6 +1534,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat2x2h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat2x3" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)?
|
||||
}
|
||||
@@ -1434,6 +1549,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat2x3h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat2x4" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)?
|
||||
}
|
||||
@@ -1443,6 +1564,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat2x4h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Bi,
|
||||
rows: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat3x2" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)?
|
||||
}
|
||||
@@ -1452,6 +1579,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat3x2h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat3x3" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)?
|
||||
}
|
||||
@@ -1461,6 +1594,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat3x3h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat3x4" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)?
|
||||
}
|
||||
@@ -1470,6 +1609,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat3x4h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Tri,
|
||||
rows: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat4x2" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)?
|
||||
}
|
||||
@@ -1479,6 +1624,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat4x2h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Bi,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat4x3" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)?
|
||||
}
|
||||
@@ -1488,6 +1639,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat4x3h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Tri,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat4x4" => {
|
||||
self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)?
|
||||
}
|
||||
@@ -1497,6 +1654,12 @@ impl Parser {
|
||||
ty: ctx.new_scalar(Scalar::F32),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"mat4x4h" => ast::Type::Matrix {
|
||||
columns: crate::VectorSize::Quad,
|
||||
rows: crate::VectorSize::Quad,
|
||||
ty: ctx.new_scalar(Scalar::F16),
|
||||
ty_span: Span::UNDEFINED,
|
||||
},
|
||||
"atomic" => {
|
||||
let scalar = lexer.next_scalar_generic()?;
|
||||
ast::Type::Atomic(scalar)
|
||||
@@ -1763,7 +1926,7 @@ impl Parser {
|
||||
|
||||
let (name, span) = lexer.next_ident_with_span()?;
|
||||
|
||||
let ty = match this.type_decl_impl(lexer, name, ctx)? {
|
||||
let ty = match this.type_decl_impl(lexer, name, span, ctx)? {
|
||||
Some(ty) => ty,
|
||||
None => {
|
||||
ctx.unresolved.insert(ast::Dependency {
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use alloc::format;
|
||||
|
||||
use crate::front::wgsl::error::NumberError;
|
||||
use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension;
|
||||
use crate::front::wgsl::parse::lexer::Token;
|
||||
use half::f16;
|
||||
|
||||
/// When using this type assume no Abstract Int/Float for now
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
@@ -18,12 +20,23 @@ pub enum Number {
|
||||
I64(i64),
|
||||
/// Concrete u64
|
||||
U64(u64),
|
||||
/// Concrete f16
|
||||
F16(f16),
|
||||
/// Concrete f32
|
||||
F32(f32),
|
||||
/// Concrete f64
|
||||
F64(f64),
|
||||
}
|
||||
|
||||
impl Number {
|
||||
pub(super) const fn requires_enable_extension(&self) -> Option<ImplementedEnableExtension> {
|
||||
match *self {
|
||||
Number::F16(_) => Some(ImplementedEnableExtension::F16),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) {
|
||||
let (result, rest) = parse(input);
|
||||
(Token::Number(result), rest)
|
||||
@@ -369,7 +382,8 @@ fn parse_hex_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
|
||||
// can only be ParseHexfErrorKind::Inexact but we can't check since it's private
|
||||
_ => Err(NumberError::NotRepresentable),
|
||||
},
|
||||
Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
|
||||
// TODO: f16 is not supported by hexf_parse
|
||||
Some(FloatKind::F16) => Err(NumberError::NotRepresentable),
|
||||
Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) {
|
||||
Ok(num) => Ok(Number::F32(num)),
|
||||
// can only be ParseHexfErrorKind::Inexact but we can't check since it's private
|
||||
@@ -405,7 +419,12 @@ fn parse_dec_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
|
||||
.then_some(Number::F64(num))
|
||||
.ok_or(NumberError::NotRepresentable)
|
||||
}
|
||||
Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
|
||||
Some(FloatKind::F16) => {
|
||||
let num = input.parse::<f16>().unwrap(); // will never fail
|
||||
num.is_finite()
|
||||
.then_some(Number::F16(num))
|
||||
.ok_or(NumberError::NotRepresentable)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -225,6 +225,7 @@ use alloc::{string::String, vec::Vec};
|
||||
|
||||
#[cfg(feature = "arbitrary")]
|
||||
use arbitrary::Arbitrary;
|
||||
use half::f16;
|
||||
#[cfg(feature = "deserialize")]
|
||||
use serde::Deserialize;
|
||||
#[cfg(feature = "serialize")]
|
||||
@@ -818,6 +819,8 @@ pub enum Literal {
|
||||
F64(f64),
|
||||
/// May not be NaN or infinity.
|
||||
F32(f32),
|
||||
/// May not be NaN or infinity.
|
||||
F16(f16),
|
||||
U32(u32),
|
||||
I32(i32),
|
||||
U64(u64),
|
||||
|
||||
@@ -7,6 +7,8 @@ use alloc::{
|
||||
use core::iter;
|
||||
|
||||
use arrayvec::ArrayVec;
|
||||
use half::f16;
|
||||
use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero};
|
||||
|
||||
use crate::{
|
||||
arena::{Arena, Handle, HandleVec, UniqueArena},
|
||||
@@ -208,6 +210,7 @@ gen_component_wise_extractor! {
|
||||
literals: [
|
||||
AbstractFloat => AbstractFloat: f64,
|
||||
F32 => F32: f32,
|
||||
F16 => F16: f16,
|
||||
AbstractInt => AbstractInt: i64,
|
||||
U32 => U32: u32,
|
||||
I32 => I32: i32,
|
||||
@@ -228,6 +231,7 @@ gen_component_wise_extractor! {
|
||||
literals: [
|
||||
AbstractFloat => Abstract: f64,
|
||||
F32 => F32: f32,
|
||||
F16 => F16: f16,
|
||||
],
|
||||
scalar_kinds: [
|
||||
Float,
|
||||
@@ -253,6 +257,7 @@ gen_component_wise_extractor! {
|
||||
AbstractFloat => AbstractFloat: f64,
|
||||
AbstractInt => AbstractInt: i64,
|
||||
F32 => F32: f32,
|
||||
F16 => F16: f16,
|
||||
I32 => I32: i32,
|
||||
],
|
||||
scalar_kinds: [
|
||||
@@ -1114,6 +1119,7 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
component_wise_scalar(self, span, [arg], |args| match args {
|
||||
Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])),
|
||||
Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])),
|
||||
Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])),
|
||||
Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
|
||||
Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
|
||||
Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
|
||||
@@ -1145,9 +1151,13 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
}
|
||||
)
|
||||
}
|
||||
crate::MathFunction::Saturate => {
|
||||
component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) })
|
||||
}
|
||||
crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e {
|
||||
Float::F16([e]) => Ok(Float::F16(
|
||||
[e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))],
|
||||
)),
|
||||
Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])),
|
||||
Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])),
|
||||
}),
|
||||
|
||||
// trigonometry
|
||||
crate::MathFunction::Cos => {
|
||||
@@ -1201,7 +1211,35 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
component_wise_float!(self, span, [arg], |e| { Ok([e.floor()]) })
|
||||
}
|
||||
crate::MathFunction::Round => {
|
||||
component_wise_float!(self, span, [arg], |e| { Ok([e.round_ties_even()]) })
|
||||
component_wise_float(self, span, [arg], |e| match e {
|
||||
Float::Abstract([e]) => Ok(Float::Abstract([e.round_ties_even()])),
|
||||
Float::F32([e]) => Ok(Float::F32([e.round_ties_even()])),
|
||||
Float::F16([e]) => {
|
||||
// TODO: `round_ties_even` is not available on `half::f16` yet.
|
||||
//
|
||||
// This polyfill is shamelessly [~~stolen from~~ inspired by `ndarray-image`][polyfill source],
|
||||
// which has licensing compatible with ours. See also
|
||||
// <https://github.com/rust-lang/rust/issues/96710>.
|
||||
//
|
||||
// [polyfill source]: https://github.com/imeka/ndarray-ndimage/blob/8b14b4d6ecfbc96a8a052f802e342a7049c68d8f/src/lib.rs#L98
|
||||
fn round_ties_even(x: f64) -> f64 {
|
||||
let i = x as i64;
|
||||
let f = (x - i as f64).abs();
|
||||
if f == 0.5 {
|
||||
if i & 1 == 1 {
|
||||
// -1.5, 1.5, 3.5, ...
|
||||
(x.abs() + 0.5).copysign(x)
|
||||
} else {
|
||||
(x.abs() - 0.5).copysign(x)
|
||||
}
|
||||
} else {
|
||||
x.round()
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))]))
|
||||
}
|
||||
})
|
||||
}
|
||||
crate::MathFunction::Fract => {
|
||||
component_wise_float!(self, span, [arg], |e| {
|
||||
@@ -1246,15 +1284,27 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
)
|
||||
}
|
||||
crate::MathFunction::Step => {
|
||||
component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| {
|
||||
Ok([if edge <= x { 1.0 } else { 0.0 }])
|
||||
component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x {
|
||||
Float::Abstract([edge, x]) => {
|
||||
Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }]))
|
||||
}
|
||||
Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])),
|
||||
Float::F16([edge, x]) => Ok(Float::F16([if edge <= x {
|
||||
f16::one()
|
||||
} else {
|
||||
f16::zero()
|
||||
}])),
|
||||
})
|
||||
}
|
||||
crate::MathFunction::Sqrt => {
|
||||
component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) })
|
||||
}
|
||||
crate::MathFunction::InverseSqrt => {
|
||||
component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) })
|
||||
component_wise_float(self, span, [arg], |e| match e {
|
||||
Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])),
|
||||
Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])),
|
||||
Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])),
|
||||
})
|
||||
}
|
||||
|
||||
// bits
|
||||
@@ -1549,6 +1599,7 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::I32(v) => v,
|
||||
Literal::U32(v) => v as i32,
|
||||
Literal::F32(v) => v as i32,
|
||||
Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf
|
||||
Literal::Bool(v) => v as i32,
|
||||
Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
|
||||
return make_error();
|
||||
@@ -1560,6 +1611,7 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::I32(v) => v as u32,
|
||||
Literal::U32(v) => v,
|
||||
Literal::F32(v) => v as u32,
|
||||
Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf
|
||||
Literal::Bool(v) => v as u32,
|
||||
Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
|
||||
return make_error();
|
||||
@@ -1575,6 +1627,7 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::F64(v) => v as i64,
|
||||
Literal::I64(v) => v,
|
||||
Literal::U64(v) => v as i64,
|
||||
Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf
|
||||
Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
|
||||
Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
|
||||
}),
|
||||
@@ -1586,9 +1639,22 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::F64(v) => v as u64,
|
||||
Literal::I64(v) => v as u64,
|
||||
Literal::U64(v) => v,
|
||||
Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf
|
||||
Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
|
||||
Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
|
||||
}),
|
||||
Sc::F16 => Literal::F16(match literal {
|
||||
Literal::F16(v) => v,
|
||||
Literal::F32(v) => f16::from_f32(v),
|
||||
Literal::F64(v) => f16::from_f64(v),
|
||||
Literal::Bool(v) => f16::from_u32(v as u32).unwrap(),
|
||||
Literal::I64(v) => f16::from_i64(v).unwrap(),
|
||||
Literal::U64(v) => f16::from_u64(v).unwrap(),
|
||||
Literal::I32(v) => f16::from_i32(v).unwrap(),
|
||||
Literal::U32(v) => f16::from_u32(v).unwrap(),
|
||||
Literal::AbstractFloat(v) => f16::try_from_abstract(v)?,
|
||||
Literal::AbstractInt(v) => f16::try_from_abstract(v)?,
|
||||
}),
|
||||
Sc::F32 => Literal::F32(match literal {
|
||||
Literal::I32(v) => v as f32,
|
||||
Literal::U32(v) => v as f32,
|
||||
@@ -1597,12 +1663,14 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
|
||||
return make_error();
|
||||
}
|
||||
Literal::F16(v) => f16::to_f32(v),
|
||||
Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
|
||||
Literal::AbstractFloat(v) => f32::try_from_abstract(v)?,
|
||||
}),
|
||||
Sc::F64 => Literal::F64(match literal {
|
||||
Literal::I32(v) => v as f64,
|
||||
Literal::U32(v) => v as f64,
|
||||
Literal::F16(v) => f16::to_f64(v),
|
||||
Literal::F32(v) => v as f64,
|
||||
Literal::F64(v) => v,
|
||||
Literal::Bool(v) => v as u32 as f64,
|
||||
@@ -1614,6 +1682,7 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::I32(v) => v != 0,
|
||||
Literal::U32(v) => v != 0,
|
||||
Literal::F32(v) => v != 0.0,
|
||||
Literal::F16(v) => v != f16::zero(),
|
||||
Literal::Bool(v) => v,
|
||||
Literal::F64(_)
|
||||
| Literal::I64(_)
|
||||
@@ -1770,6 +1839,7 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
Literal::I32(v) => Literal::I32(v.wrapping_neg()),
|
||||
Literal::I64(v) => Literal::I64(v.wrapping_neg()),
|
||||
Literal::F32(v) => Literal::F32(-v),
|
||||
Literal::F16(v) => Literal::F16(-v),
|
||||
Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()),
|
||||
Literal::AbstractFloat(v) => Literal::AbstractFloat(-v),
|
||||
_ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg),
|
||||
@@ -1918,6 +1988,14 @@ impl<'a> ConstantEvaluator<'a> {
|
||||
_ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
|
||||
})
|
||||
}
|
||||
(Literal::F16(a), Literal::F16(b)) => Literal::F16(match op {
|
||||
BinaryOperator::Add => a + b,
|
||||
BinaryOperator::Subtract => a - b,
|
||||
BinaryOperator::Multiply => a * b,
|
||||
BinaryOperator::Divide => a / b,
|
||||
BinaryOperator::Modulo => a % b,
|
||||
_ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
|
||||
}),
|
||||
(Literal::AbstractInt(a), Literal::AbstractInt(b)) => {
|
||||
Literal::AbstractInt(match op {
|
||||
BinaryOperator::Add => a.checked_add(b).ok_or_else(|| {
|
||||
@@ -2522,6 +2600,32 @@ impl TryFromAbstract<f64> for u64 {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFromAbstract<f64> for f16 {
|
||||
fn try_from_abstract(value: f64) -> Result<f16, ConstantEvaluatorError> {
|
||||
let f = f16::from_f64(value);
|
||||
if f.is_infinite() {
|
||||
return Err(ConstantEvaluatorError::AutomaticConversionLossy {
|
||||
value: format!("{value:?}"),
|
||||
to_type: "f16",
|
||||
});
|
||||
}
|
||||
Ok(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFromAbstract<i64> for f16 {
|
||||
fn try_from_abstract(value: i64) -> Result<f16, ConstantEvaluatorError> {
|
||||
let f = f16::from_i64(value);
|
||||
if f.is_none() {
|
||||
return Err(ConstantEvaluatorError::AutomaticConversionLossy {
|
||||
value: format!("{value:?}"),
|
||||
to_type: "f16",
|
||||
});
|
||||
}
|
||||
Ok(f.unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use alloc::{vec, vec::Vec};
|
||||
|
||||
@@ -80,6 +80,7 @@ impl From<super::StorageFormat> for super::Scalar {
|
||||
pub enum HashableLiteral {
|
||||
F64(u64),
|
||||
F32(u32),
|
||||
F16(u16),
|
||||
U32(u32),
|
||||
I32(i32),
|
||||
U64(u64),
|
||||
@@ -94,6 +95,7 @@ impl From<crate::Literal> for HashableLiteral {
|
||||
match l {
|
||||
crate::Literal::F64(v) => Self::F64(v.to_bits()),
|
||||
crate::Literal::F32(v) => Self::F32(v.to_bits()),
|
||||
crate::Literal::F16(v) => Self::F16(v.to_bits()),
|
||||
crate::Literal::U32(v) => Self::U32(v),
|
||||
crate::Literal::I32(v) => Self::I32(v),
|
||||
crate::Literal::U64(v) => Self::U64(v),
|
||||
@@ -132,6 +134,7 @@ impl crate::Literal {
|
||||
match *self {
|
||||
Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
|
||||
Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
|
||||
Self::F16(_) => 2,
|
||||
Self::Bool(_) => crate::BOOL_WIDTH,
|
||||
Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
|
||||
}
|
||||
@@ -140,6 +143,7 @@ impl crate::Literal {
|
||||
match *self {
|
||||
Self::F64(_) => crate::Scalar::F64,
|
||||
Self::F32(_) => crate::Scalar::F32,
|
||||
Self::F16(_) => crate::Scalar::F16,
|
||||
Self::U32(_) => crate::Scalar::U32,
|
||||
Self::I32(_) => crate::Scalar::I32,
|
||||
Self::U64(_) => crate::Scalar::U64,
|
||||
|
||||
@@ -20,6 +20,10 @@ impl crate::ScalarKind {
|
||||
}
|
||||
|
||||
impl crate::Scalar {
|
||||
pub const F16: Self = Self {
|
||||
kind: crate::ScalarKind::Float,
|
||||
width: 2,
|
||||
};
|
||||
pub const I32: Self = Self {
|
||||
kind: crate::ScalarKind::Sint,
|
||||
width: 4,
|
||||
|
||||
@@ -1794,7 +1794,7 @@ impl super::Validator {
|
||||
}
|
||||
|
||||
pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> {
|
||||
self.check_width(literal.scalar())?;
|
||||
let _ = self.check_width(literal.scalar())?;
|
||||
check_literal_value(literal)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -4,7 +4,7 @@ use bit_set::BitSet;
|
||||
|
||||
use super::{
|
||||
analyzer::{FunctionInfo, GlobalUse},
|
||||
Capabilities, Disalignment, FunctionError, ModuleInfo,
|
||||
Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError,
|
||||
};
|
||||
use crate::arena::{Handle, UniqueArena};
|
||||
use crate::span::{AddSpan as _, MapErrWithSpan as _, SpanProvider as _, WithSpan};
|
||||
@@ -41,6 +41,8 @@ pub enum GlobalVariableError {
|
||||
InitializerNotAllowed(crate::AddressSpace),
|
||||
#[error("Storage address space doesn't support write-only access")]
|
||||
StorageAddressSpaceWriteOnlyNotSupported,
|
||||
#[error("Type is not valid for use as a push constant")]
|
||||
InvalidPushConstantType(#[source] PushConstantError),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, thiserror::Error)]
|
||||
@@ -596,6 +598,9 @@ impl super::Validator {
|
||||
Capabilities::PUSH_CONSTANT,
|
||||
));
|
||||
}
|
||||
if let Err(ref err) = type_info.push_constant_compatibility {
|
||||
return Err(GlobalVariableError::InvalidPushConstantType(err.clone()));
|
||||
}
|
||||
(
|
||||
TypeFlags::DATA
|
||||
| TypeFlags::COPY
|
||||
|
||||
@@ -31,7 +31,7 @@ pub use expression::{check_literal_value, LiteralError};
|
||||
pub use expression::{ConstExpressionError, ExpressionError};
|
||||
pub use function::{CallError, FunctionError, LocalVariableError};
|
||||
pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
|
||||
pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};
|
||||
pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError};
|
||||
|
||||
use self::handles::InvalidHandleError;
|
||||
|
||||
@@ -163,6 +163,8 @@ bitflags::bitflags! {
|
||||
const TEXTURE_INT64_ATOMIC = 1 << 24;
|
||||
/// Support for ray queries returning vertex position
|
||||
const RAY_HIT_VERTEX_POSITION = 1 << 25;
|
||||
/// Support for 16-bit floating-point types.
|
||||
const SHADER_FLOAT16 = 1 << 26;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -169,8 +169,16 @@ pub enum WidthError {
|
||||
Abstract,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, thiserror::Error)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
pub enum PushConstantError {
|
||||
#[error("The scalar type {0:?} is not supported in push constants")]
|
||||
InvalidScalar(crate::Scalar),
|
||||
}
|
||||
|
||||
// Only makes sense if `flags.contains(HOST_SHAREABLE)`
|
||||
type LayoutCompatibility = Result<Alignment, (Handle<crate::Type>, Disalignment)>;
|
||||
type PushConstantCompatibility = Result<(), PushConstantError>;
|
||||
|
||||
fn check_member_layout(
|
||||
accum: &mut LayoutCompatibility,
|
||||
@@ -221,6 +229,7 @@ pub(super) struct TypeInfo {
|
||||
pub flags: TypeFlags,
|
||||
pub uniform_layout: LayoutCompatibility,
|
||||
pub storage_layout: LayoutCompatibility,
|
||||
pub push_constant_compatibility: PushConstantCompatibility,
|
||||
}
|
||||
|
||||
impl TypeInfo {
|
||||
@@ -229,6 +238,7 @@ impl TypeInfo {
|
||||
flags: TypeFlags::empty(),
|
||||
uniform_layout: Ok(Alignment::ONE),
|
||||
storage_layout: Ok(Alignment::ONE),
|
||||
push_constant_compatibility: Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,6 +247,7 @@ impl TypeInfo {
|
||||
flags,
|
||||
uniform_layout: Ok(alignment),
|
||||
storage_layout: Ok(alignment),
|
||||
push_constant_compatibility: Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -250,11 +261,15 @@ impl super::Validator {
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> {
|
||||
pub(super) const fn check_width(
|
||||
&self,
|
||||
scalar: crate::Scalar,
|
||||
) -> Result<PushConstantCompatibility, WidthError> {
|
||||
let mut push_constant_compatibility = Ok(());
|
||||
let good = match scalar.kind {
|
||||
crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH,
|
||||
crate::ScalarKind::Float => {
|
||||
if scalar.width == 8 {
|
||||
crate::ScalarKind::Float => match scalar.width {
|
||||
8 => {
|
||||
if !self.capabilities.contains(Capabilities::FLOAT64) {
|
||||
return Err(WidthError::MissingCapability {
|
||||
name: "f64",
|
||||
@@ -262,10 +277,21 @@ impl super::Validator {
|
||||
});
|
||||
}
|
||||
true
|
||||
} else {
|
||||
scalar.width == 4
|
||||
}
|
||||
}
|
||||
2 => {
|
||||
if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) {
|
||||
return Err(WidthError::MissingCapability {
|
||||
name: "f16",
|
||||
flag: "FLOAT16",
|
||||
});
|
||||
}
|
||||
|
||||
push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar));
|
||||
|
||||
true
|
||||
}
|
||||
_ => scalar.width == 4,
|
||||
},
|
||||
crate::ScalarKind::Sint => {
|
||||
if scalar.width == 8 {
|
||||
if !self.capabilities.contains(Capabilities::SHADER_INT64) {
|
||||
@@ -297,7 +323,7 @@ impl super::Validator {
|
||||
}
|
||||
};
|
||||
if good {
|
||||
Ok(())
|
||||
Ok(push_constant_compatibility)
|
||||
} else {
|
||||
Err(WidthError::Invalid(scalar.kind, scalar.width))
|
||||
}
|
||||
@@ -317,13 +343,13 @@ impl super::Validator {
|
||||
use crate::TypeInner as Ti;
|
||||
Ok(match gctx.types[handle].inner {
|
||||
Ti::Scalar(scalar) => {
|
||||
self.check_width(scalar)?;
|
||||
let push_constant_compatibility = self.check_width(scalar)?;
|
||||
let shareable = if scalar.kind.is_numeric() {
|
||||
TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
|
||||
} else {
|
||||
TypeFlags::empty()
|
||||
};
|
||||
TypeInfo::new(
|
||||
let mut type_info = TypeInfo::new(
|
||||
TypeFlags::DATA
|
||||
| TypeFlags::SIZED
|
||||
| TypeFlags::COPY
|
||||
@@ -332,16 +358,18 @@ impl super::Validator {
|
||||
| TypeFlags::CREATION_RESOLVED
|
||||
| shareable,
|
||||
Alignment::from_width(scalar.width),
|
||||
)
|
||||
);
|
||||
type_info.push_constant_compatibility = push_constant_compatibility;
|
||||
type_info
|
||||
}
|
||||
Ti::Vector { size, scalar } => {
|
||||
self.check_width(scalar)?;
|
||||
let push_constant_compatibility = self.check_width(scalar)?;
|
||||
let shareable = if scalar.kind.is_numeric() {
|
||||
TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
|
||||
} else {
|
||||
TypeFlags::empty()
|
||||
};
|
||||
TypeInfo::new(
|
||||
let mut type_info = TypeInfo::new(
|
||||
TypeFlags::DATA
|
||||
| TypeFlags::SIZED
|
||||
| TypeFlags::COPY
|
||||
@@ -350,7 +378,9 @@ impl super::Validator {
|
||||
| TypeFlags::CREATION_RESOLVED
|
||||
| shareable,
|
||||
Alignment::from(size) * Alignment::from_width(scalar.width),
|
||||
)
|
||||
);
|
||||
type_info.push_constant_compatibility = push_constant_compatibility;
|
||||
type_info
|
||||
}
|
||||
Ti::Matrix {
|
||||
columns: _,
|
||||
@@ -360,8 +390,8 @@ impl super::Validator {
|
||||
if scalar.kind != crate::ScalarKind::Float {
|
||||
return Err(TypeError::MatrixElementNotFloat);
|
||||
}
|
||||
self.check_width(scalar)?;
|
||||
TypeInfo::new(
|
||||
let push_constant_compatibility = self.check_width(scalar)?;
|
||||
let mut type_info = TypeInfo::new(
|
||||
TypeFlags::DATA
|
||||
| TypeFlags::SIZED
|
||||
| TypeFlags::COPY
|
||||
@@ -370,7 +400,9 @@ impl super::Validator {
|
||||
| TypeFlags::CONSTRUCTIBLE
|
||||
| TypeFlags::CREATION_RESOLVED,
|
||||
Alignment::from(rows) * Alignment::from_width(scalar.width),
|
||||
)
|
||||
);
|
||||
type_info.push_constant_compatibility = push_constant_compatibility;
|
||||
type_info
|
||||
}
|
||||
Ti::Atomic(scalar) => {
|
||||
match scalar {
|
||||
@@ -465,7 +497,7 @@ impl super::Validator {
|
||||
// However, some cases are trivial: All our implicit base types
|
||||
// are DATA and SIZED, so we can never return
|
||||
// `InvalidPointerBase` or `InvalidPointerToUnsized`.
|
||||
self.check_width(scalar)?;
|
||||
let _ = self.check_width(scalar)?;
|
||||
|
||||
// `Validator::validate_function` actually checks the address
|
||||
// space of pointer arguments explicitly before checking the
|
||||
@@ -551,6 +583,7 @@ impl super::Validator {
|
||||
flags: base_info.flags & type_info_mask,
|
||||
uniform_layout,
|
||||
storage_layout,
|
||||
push_constant_compatibility: base_info.push_constant_compatibility.clone(),
|
||||
}
|
||||
}
|
||||
Ti::Struct { ref members, span } => {
|
||||
@@ -631,6 +664,10 @@ impl super::Validator {
|
||||
base_info.storage_layout,
|
||||
handle,
|
||||
);
|
||||
if base_info.push_constant_compatibility.is_err() {
|
||||
ti.push_constant_compatibility =
|
||||
base_info.push_constant_compatibility.clone();
|
||||
}
|
||||
|
||||
// Validate rule: If a structure member itself has a structure type S,
|
||||
// then the number of bytes between the start of that member and
|
||||
|
||||
57
naga/tests/in/glsl/f16-glsl.comp
Normal file
57
naga/tests/in/glsl/f16-glsl.comp
Normal file
@@ -0,0 +1,57 @@
|
||||
#version 460
|
||||
|
||||
#extension GL_AMD_gpu_shader_half_float: enable
|
||||
|
||||
layout(set = 0, binding = 0) uniform A {
|
||||
float16_t a_1;
|
||||
f16vec2 a_vec2;
|
||||
f16vec3 a_vec3;
|
||||
f16vec4 a_vec4;
|
||||
// So the rules here are particularly nasty for any f16 matries in uniform buffers
|
||||
// as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
|
||||
// buffer is over-aligned to what naga-ir wants.
|
||||
//
|
||||
// This is https://github.com/gfx-rs/wgpu/issues/4375.
|
||||
|
||||
// f16mat2 a_mat2;
|
||||
// f16mat2x3 a_mat2x3;
|
||||
// f16mat2x4 a_mat2x4;
|
||||
// f16mat3x2 a_mat3x2;
|
||||
// f16mat3 a_mat3;
|
||||
// f16mat3x4 a_mat3x4;
|
||||
// f16mat4x2 a_mat4x2;
|
||||
// f16mat4x3 a_mat4x3;
|
||||
// f16mat4 a_mat4;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer B {
|
||||
float16_t b_1;
|
||||
f16vec2 b_vec2;
|
||||
f16vec3 b_vec3;
|
||||
f16vec4 b_vec4;
|
||||
f16mat2 b_mat2;
|
||||
f16mat2x3 b_mat2x3;
|
||||
f16mat2x4 b_mat2x4;
|
||||
f16mat3x2 b_mat3x2;
|
||||
f16mat3 b_mat3;
|
||||
f16mat3x4 b_mat3x4;
|
||||
f16mat4x2 b_mat4x2;
|
||||
f16mat4x3 b_mat4x3;
|
||||
f16mat4 b_mat4;
|
||||
};
|
||||
|
||||
void main() {
|
||||
b_1 = a_1;
|
||||
b_vec2 = a_vec2;
|
||||
b_vec3 = a_vec3;
|
||||
b_vec4 = a_vec4;
|
||||
// b_mat2 = a_mat2;
|
||||
// b_mat2x3 = a_mat2x3;
|
||||
// b_mat2x4 = a_mat2x4;
|
||||
// b_mat3x2 = a_mat3x2;
|
||||
// b_mat3 = a_mat3;
|
||||
// b_mat3x4 = a_mat3x4;
|
||||
// b_mat4x2 = a_mat4x2;
|
||||
// b_mat4x3 = a_mat4x3;
|
||||
// b_mat4 = a_mat4;
|
||||
}
|
||||
1
naga/tests/in/glsl/f16-glsl.toml
Normal file
1
naga/tests/in/glsl/f16-glsl.toml
Normal file
@@ -0,0 +1 @@
|
||||
god_mode = true
|
||||
57
naga/tests/in/spv/f16-spv.comp
Normal file
57
naga/tests/in/spv/f16-spv.comp
Normal file
@@ -0,0 +1,57 @@
|
||||
#version 460
|
||||
|
||||
#extension GL_AMD_gpu_shader_half_float: enable
|
||||
|
||||
layout(set = 0, binding = 0) uniform A {
|
||||
float16_t a_1;
|
||||
f16vec2 a_vec2;
|
||||
f16vec3 a_vec3;
|
||||
f16vec4 a_vec4;
|
||||
// So the rules here are particularly nasty for any f16 matries in uniform buffers
|
||||
// as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
|
||||
// buffer is over-aligned to what naga-ir wants.
|
||||
//
|
||||
// This is https://github.com/gfx-rs/wgpu/issues/4375.
|
||||
|
||||
// f16mat2 a_mat2;
|
||||
// f16mat2x3 a_mat2x3;
|
||||
// f16mat2x4 a_mat2x4;
|
||||
// f16mat3x2 a_mat3x2;
|
||||
// f16mat3 a_mat3;
|
||||
// f16mat3x4 a_mat3x4;
|
||||
// f16mat4x2 a_mat4x2;
|
||||
// f16mat4x3 a_mat4x3;
|
||||
// f16mat4 a_mat4;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer B {
|
||||
float16_t b_1;
|
||||
f16vec2 b_vec2;
|
||||
f16vec3 b_vec3;
|
||||
f16vec4 b_vec4;
|
||||
f16mat2 b_mat2;
|
||||
f16mat2x3 b_mat2x3;
|
||||
f16mat2x4 b_mat2x4;
|
||||
f16mat3x2 b_mat3x2;
|
||||
f16mat3 b_mat3;
|
||||
f16mat3x4 b_mat3x4;
|
||||
f16mat4x2 b_mat4x2;
|
||||
f16mat4x3 b_mat4x3;
|
||||
f16mat4 b_mat4;
|
||||
};
|
||||
|
||||
void main() {
|
||||
b_1 = a_1;
|
||||
b_vec2 = a_vec2;
|
||||
b_vec3 = a_vec3;
|
||||
b_vec4 = a_vec4;
|
||||
// b_mat2 = a_mat2;
|
||||
// b_mat2x3 = a_mat2x3;
|
||||
// b_mat2x4 = a_mat2x4;
|
||||
// b_mat3x2 = a_mat3x2;
|
||||
// b_mat3 = a_mat3;
|
||||
// b_mat3x4 = a_mat3x4;
|
||||
// b_mat4x2 = a_mat4x2;
|
||||
// b_mat4x3 = a_mat4x3;
|
||||
// b_mat4 = a_mat4;
|
||||
}
|
||||
130
naga/tests/in/spv/f16-spv.spvasm
Normal file
130
naga/tests/in/spv/f16-spv.spvasm
Normal file
@@ -0,0 +1,130 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Google Shaderc over Glslang; 11
|
||||
; Bound: 46
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability StorageBuffer16BitAccess
|
||||
OpCapability UniformAndStorageBuffer16BitAccess
|
||||
OpExtension "SPV_KHR_16bit_storage"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %main "main"
|
||||
OpExecutionMode %main LocalSize 1 1 1
|
||||
OpSource GLSL 460
|
||||
OpSourceExtension "GL_AMD_gpu_shader_half_float"
|
||||
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
|
||||
OpSourceExtension "GL_GOOGLE_include_directive"
|
||||
OpName %main "main"
|
||||
OpName %B "B"
|
||||
OpMemberName %B 0 "b_1"
|
||||
OpMemberName %B 1 "b_vec2"
|
||||
OpMemberName %B 2 "b_vec3"
|
||||
OpMemberName %B 3 "b_vec4"
|
||||
OpMemberName %B 4 "b_mat2"
|
||||
OpMemberName %B 5 "b_mat2x3"
|
||||
OpMemberName %B 6 "b_mat2x4"
|
||||
OpMemberName %B 7 "b_mat3x2"
|
||||
OpMemberName %B 8 "b_mat3"
|
||||
OpMemberName %B 9 "b_mat3x4"
|
||||
OpMemberName %B 10 "b_mat4x2"
|
||||
OpMemberName %B 11 "b_mat4x3"
|
||||
OpMemberName %B 12 "b_mat4"
|
||||
OpName %_ ""
|
||||
OpName %A "A"
|
||||
OpMemberName %A 0 "a_1"
|
||||
OpMemberName %A 1 "a_vec2"
|
||||
OpMemberName %A 2 "a_vec3"
|
||||
OpMemberName %A 3 "a_vec4"
|
||||
OpName %__0 ""
|
||||
OpDecorate %B BufferBlock
|
||||
OpMemberDecorate %B 0 Offset 0
|
||||
OpMemberDecorate %B 1 Offset 4
|
||||
OpMemberDecorate %B 2 Offset 8
|
||||
OpMemberDecorate %B 3 Offset 16
|
||||
OpMemberDecorate %B 4 ColMajor
|
||||
OpMemberDecorate %B 4 MatrixStride 4
|
||||
OpMemberDecorate %B 4 Offset 24
|
||||
OpMemberDecorate %B 5 ColMajor
|
||||
OpMemberDecorate %B 5 MatrixStride 8
|
||||
OpMemberDecorate %B 5 Offset 32
|
||||
OpMemberDecorate %B 6 ColMajor
|
||||
OpMemberDecorate %B 6 MatrixStride 8
|
||||
OpMemberDecorate %B 6 Offset 48
|
||||
OpMemberDecorate %B 7 ColMajor
|
||||
OpMemberDecorate %B 7 MatrixStride 4
|
||||
OpMemberDecorate %B 7 Offset 64
|
||||
OpMemberDecorate %B 8 ColMajor
|
||||
OpMemberDecorate %B 8 MatrixStride 8
|
||||
OpMemberDecorate %B 8 Offset 80
|
||||
OpMemberDecorate %B 9 ColMajor
|
||||
OpMemberDecorate %B 9 MatrixStride 8
|
||||
OpMemberDecorate %B 9 Offset 104
|
||||
OpMemberDecorate %B 10 ColMajor
|
||||
OpMemberDecorate %B 10 MatrixStride 4
|
||||
OpMemberDecorate %B 10 Offset 128
|
||||
OpMemberDecorate %B 11 ColMajor
|
||||
OpMemberDecorate %B 11 MatrixStride 8
|
||||
OpMemberDecorate %B 11 Offset 144
|
||||
OpMemberDecorate %B 12 ColMajor
|
||||
OpMemberDecorate %B 12 MatrixStride 8
|
||||
OpMemberDecorate %B 12 Offset 176
|
||||
OpDecorate %_ Binding 1
|
||||
OpDecorate %_ DescriptorSet 0
|
||||
OpDecorate %A Block
|
||||
OpMemberDecorate %A 0 Offset 0
|
||||
OpMemberDecorate %A 1 Offset 4
|
||||
OpMemberDecorate %A 2 Offset 8
|
||||
OpMemberDecorate %A 3 Offset 16
|
||||
OpDecorate %__0 Binding 0
|
||||
OpDecorate %__0 DescriptorSet 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%half = OpTypeFloat 16
|
||||
%v2half = OpTypeVector %half 2
|
||||
%v3half = OpTypeVector %half 3
|
||||
%v4half = OpTypeVector %half 4
|
||||
%mat2v2half = OpTypeMatrix %v2half 2
|
||||
%mat2v3half = OpTypeMatrix %v3half 2
|
||||
%mat2v4half = OpTypeMatrix %v4half 2
|
||||
%mat3v2half = OpTypeMatrix %v2half 3
|
||||
%mat3v3half = OpTypeMatrix %v3half 3
|
||||
%mat3v4half = OpTypeMatrix %v4half 3
|
||||
%mat4v2half = OpTypeMatrix %v2half 4
|
||||
%mat4v3half = OpTypeMatrix %v3half 4
|
||||
%mat4v4half = OpTypeMatrix %v4half 4
|
||||
%B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half
|
||||
%_ptr_Uniform_B = OpTypePointer Uniform %B
|
||||
%_ = OpVariable %_ptr_Uniform_B Uniform
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%A = OpTypeStruct %half %v2half %v3half %v4half
|
||||
%_ptr_Uniform_A = OpTypePointer Uniform %A
|
||||
%__0 = OpVariable %_ptr_Uniform_A Uniform
|
||||
%_ptr_Uniform_half = OpTypePointer Uniform %half
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
|
||||
%int_2 = OpConstant %int 2
|
||||
%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
|
||||
%int_3 = OpConstant %int 3
|
||||
%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0
|
||||
%29 = OpLoad %half %28
|
||||
%30 = OpAccessChain %_ptr_Uniform_half %_ %int_0
|
||||
OpStore %30 %29
|
||||
%33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1
|
||||
%34 = OpLoad %v2half %33
|
||||
%35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1
|
||||
OpStore %35 %34
|
||||
%38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2
|
||||
%39 = OpLoad %v3half %38
|
||||
%40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2
|
||||
OpStore %40 %39
|
||||
%43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3
|
||||
%44 = OpLoad %v4half %43
|
||||
%45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3
|
||||
OpStore %45 %44
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
1
naga/tests/in/spv/f16-spv.toml
Normal file
1
naga/tests/in/spv/f16-spv.toml
Normal file
@@ -0,0 +1 @@
|
||||
god_mode = true
|
||||
@@ -1,6 +1,6 @@
|
||||
struct PushConstants {
|
||||
index: u32,
|
||||
double: vec2<f64>,
|
||||
double: vec2<f32>,
|
||||
}
|
||||
var<push_constant> pc: PushConstants;
|
||||
|
||||
|
||||
15
naga/tests/in/wgsl/f16.toml
Normal file
15
naga/tests/in/wgsl/f16.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
# No GLSL support for f16
|
||||
targets = "SPIRV | METAL | HLSL | WGSL"
|
||||
god_mode = true
|
||||
|
||||
[spv]
|
||||
version = [1, 0]
|
||||
|
||||
[hlsl]
|
||||
shader_model = "V6_2"
|
||||
special_constants_binding = { space = 1, register = 0 }
|
||||
push_constants_target = { space = 0, register = 0 }
|
||||
|
||||
lang_version = [1, 0]
|
||||
per_entry_point_map = {}
|
||||
zero_initialize_workgroup_memory = true
|
||||
127
naga/tests/in/wgsl/f16.wgsl
Normal file
127
naga/tests/in/wgsl/f16.wgsl
Normal file
@@ -0,0 +1,127 @@
|
||||
enable f16;
|
||||
|
||||
var<private> private_variable: f16 = 1h;
|
||||
const constant_variable: f16 = f16(15.2);
|
||||
|
||||
struct UniformCompatible {
|
||||
// Other types
|
||||
val_u32: u32,
|
||||
val_i32: i32,
|
||||
val_f32: f32,
|
||||
|
||||
// f16
|
||||
val_f16: f16,
|
||||
val_f16_2: vec2<f16>,
|
||||
val_f16_3: vec3<f16>,
|
||||
val_f16_4: vec4<f16>,
|
||||
final_value: f16,
|
||||
|
||||
val_mat2x2: mat2x2<f16>,
|
||||
val_mat2x3: mat2x3<f16>,
|
||||
val_mat2x4: mat2x4<f16>,
|
||||
val_mat3x2: mat3x2<f16>,
|
||||
val_mat3x3: mat3x3<f16>,
|
||||
val_mat3x4: mat3x4<f16>,
|
||||
val_mat4x2: mat4x2<f16>,
|
||||
val_mat4x3: mat4x3<f16>,
|
||||
val_mat4x4: mat4x4<f16>,
|
||||
}
|
||||
|
||||
struct StorageCompatible {
|
||||
val_f16_array_2: array<f16, 2>,
|
||||
}
|
||||
|
||||
struct LayoutTest {
|
||||
scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32
|
||||
}
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<uniform> input_uniform: UniformCompatible;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> input_storage: UniformCompatible;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage> input_arrays: StorageCompatible;
|
||||
|
||||
@group(0) @binding(3)
|
||||
var<storage, read_write> output: UniformCompatible;
|
||||
|
||||
@group(0) @binding(4)
|
||||
var<storage, read_write> output_arrays: StorageCompatible;
|
||||
|
||||
fn f16_function(x: f16) -> f16 {
|
||||
var val: f16 = f16(constant_variable);
|
||||
// A number too big for f16
|
||||
val += 1h - 33333h;
|
||||
// Constructing an f16 from an AbstractInt
|
||||
val += val + f16(5.);
|
||||
// Constructing a f16 from other types and other types from f16.
|
||||
val += f16(input_uniform.val_f32 + f32(val));
|
||||
// Constructing a vec3<i64> from a i64
|
||||
val += vec3<f16>(input_uniform.val_f16).z;
|
||||
|
||||
// Reading/writing to a uniform/storage buffer
|
||||
output.val_f16 = input_uniform.val_f16 + input_storage.val_f16;
|
||||
output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2;
|
||||
output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3;
|
||||
output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4;
|
||||
|
||||
output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2;
|
||||
output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3;
|
||||
output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4;
|
||||
output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2;
|
||||
output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3;
|
||||
output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4;
|
||||
output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2;
|
||||
output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3;
|
||||
output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4;
|
||||
|
||||
output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2;
|
||||
|
||||
// We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
|
||||
// constants based on the size of the type, and which are arguments.
|
||||
|
||||
// Numeric functions
|
||||
val += abs(val);
|
||||
val += clamp(val, val, val);
|
||||
val += dot(vec2(val), vec2(val));
|
||||
val += max(val, val);
|
||||
val += min(val, val);
|
||||
val += sign(val);
|
||||
|
||||
val += f16(1.0);
|
||||
|
||||
// We use the shorthand aliases here to ensure the aliases
|
||||
// work correctly.
|
||||
|
||||
// Cast vectors to/from f32
|
||||
let float_vec2 = vec2f(input_uniform.val_f16_2);
|
||||
output.val_f16_2 = vec2h(float_vec2);
|
||||
|
||||
let float_vec3 = vec3f(input_uniform.val_f16_3);
|
||||
output.val_f16_3 = vec3h(float_vec3);
|
||||
|
||||
let float_vec4 = vec4f(input_uniform.val_f16_4);
|
||||
output.val_f16_4 = vec4h(float_vec4);
|
||||
|
||||
// Cast matrices to/from f32
|
||||
output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2));
|
||||
output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3));
|
||||
output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4));
|
||||
output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2));
|
||||
output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3));
|
||||
output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4));
|
||||
output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2));
|
||||
output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3));
|
||||
output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4));
|
||||
|
||||
// Make sure all the variables are used.
|
||||
return val;
|
||||
}
|
||||
|
||||
@compute @workgroup_size(1)
|
||||
fn main() {
|
||||
output.final_value = f16_function(2h);
|
||||
}
|
||||
|
||||
351
naga/tests/out/hlsl/f16.hlsl
Normal file
351
naga/tests/out/hlsl/f16.hlsl
Normal file
@@ -0,0 +1,351 @@
|
||||
struct NagaConstants {
|
||||
int first_vertex;
|
||||
int first_instance;
|
||||
uint other;
|
||||
};
|
||||
ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
|
||||
|
||||
struct UniformCompatible {
|
||||
uint val_u32_;
|
||||
int val_i32_;
|
||||
float val_f32_;
|
||||
half val_f16_;
|
||||
half2 val_f16_2_;
|
||||
int _pad5_0;
|
||||
half3 val_f16_3_;
|
||||
half4 val_f16_4_;
|
||||
half final_value;
|
||||
half2 val_mat2x2__0; half2 val_mat2x2__1;
|
||||
int _pad9_0;
|
||||
row_major half2x3 val_mat2x3_;
|
||||
row_major half2x4 val_mat2x4_;
|
||||
half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2;
|
||||
int _pad12_0;
|
||||
row_major half3x3 val_mat3x3_;
|
||||
row_major half3x4 val_mat3x4_;
|
||||
half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3;
|
||||
row_major half4x3 val_mat4x3_;
|
||||
row_major half4x4 val_mat4x4_;
|
||||
};
|
||||
|
||||
struct StorageCompatible {
|
||||
half val_f16_array_2_[2];
|
||||
};
|
||||
|
||||
struct LayoutTest {
|
||||
half scalar1_;
|
||||
half scalar2_;
|
||||
int _pad2_0;
|
||||
half3 v3_;
|
||||
half tuck_in;
|
||||
half scalar4_;
|
||||
uint larger;
|
||||
};
|
||||
|
||||
static const half constant_variable = 15.203125h;
|
||||
|
||||
static half private_variable = 1.0h;
|
||||
cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
|
||||
ByteAddressBuffer input_storage : register(t1);
|
||||
ByteAddressBuffer input_arrays : register(t2);
|
||||
RWByteAddressBuffer output : register(u3);
|
||||
RWByteAddressBuffer output_arrays : register(u4);
|
||||
|
||||
half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) {
|
||||
return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1);
|
||||
}
|
||||
|
||||
void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) {
|
||||
obj.val_mat2x2__0 = mat[0];
|
||||
obj.val_mat2x2__1 = mat[1];
|
||||
}
|
||||
|
||||
void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
|
||||
switch(mat_idx) {
|
||||
case 0: { obj.val_mat2x2__0 = vec; break; }
|
||||
case 1: { obj.val_mat2x2__1 = vec; break; }
|
||||
}
|
||||
}
|
||||
|
||||
void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
|
||||
switch(mat_idx) {
|
||||
case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; }
|
||||
case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; }
|
||||
}
|
||||
}
|
||||
|
||||
half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) {
|
||||
return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2);
|
||||
}
|
||||
|
||||
void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) {
|
||||
obj.val_mat3x2__0 = mat[0];
|
||||
obj.val_mat3x2__1 = mat[1];
|
||||
obj.val_mat3x2__2 = mat[2];
|
||||
}
|
||||
|
||||
void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
|
||||
switch(mat_idx) {
|
||||
case 0: { obj.val_mat3x2__0 = vec; break; }
|
||||
case 1: { obj.val_mat3x2__1 = vec; break; }
|
||||
case 2: { obj.val_mat3x2__2 = vec; break; }
|
||||
}
|
||||
}
|
||||
|
||||
void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
|
||||
switch(mat_idx) {
|
||||
case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; }
|
||||
case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; }
|
||||
case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; }
|
||||
}
|
||||
}
|
||||
|
||||
half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) {
|
||||
return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3);
|
||||
}
|
||||
|
||||
void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) {
|
||||
obj.val_mat4x2__0 = mat[0];
|
||||
obj.val_mat4x2__1 = mat[1];
|
||||
obj.val_mat4x2__2 = mat[2];
|
||||
obj.val_mat4x2__3 = mat[3];
|
||||
}
|
||||
|
||||
void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
|
||||
switch(mat_idx) {
|
||||
case 0: { obj.val_mat4x2__0 = vec; break; }
|
||||
case 1: { obj.val_mat4x2__1 = vec; break; }
|
||||
case 2: { obj.val_mat4x2__2 = vec; break; }
|
||||
case 3: { obj.val_mat4x2__3 = vec; break; }
|
||||
}
|
||||
}
|
||||
|
||||
void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
|
||||
switch(mat_idx) {
|
||||
case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; }
|
||||
case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; }
|
||||
case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; }
|
||||
case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; }
|
||||
}
|
||||
}
|
||||
|
||||
typedef half ret_Constructarray2_half_[2];
|
||||
ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) {
|
||||
half ret[2] = { arg0, arg1 };
|
||||
return ret;
|
||||
}
|
||||
|
||||
half f16_function(half x)
|
||||
{
|
||||
half val = 15.203125h;
|
||||
|
||||
half _e4 = val;
|
||||
val = (_e4 + -33344.0h);
|
||||
half _e6 = val;
|
||||
half _e9 = val;
|
||||
val = (_e9 + (_e6 + 5.0h));
|
||||
float _e13 = input_uniform.val_f32_;
|
||||
half _e14 = val;
|
||||
half _e18 = val;
|
||||
val = (_e18 + half((_e13 + float(_e14))));
|
||||
half _e22 = input_uniform.val_f16_;
|
||||
half _e25 = val;
|
||||
val = (_e25 + (_e22).xxx.z);
|
||||
half _e31 = input_uniform.val_f16_;
|
||||
half _e34 = input_storage.Load<half>(12);
|
||||
output.Store(12, (_e31 + _e34));
|
||||
half2 _e40 = input_uniform.val_f16_2_;
|
||||
half2 _e43 = input_storage.Load<half2>(16);
|
||||
output.Store(16, (_e40 + _e43));
|
||||
half3 _e49 = input_uniform.val_f16_3_;
|
||||
half3 _e52 = input_storage.Load<half3>(24);
|
||||
output.Store(24, (_e49 + _e52));
|
||||
half4 _e58 = input_uniform.val_f16_4_;
|
||||
half4 _e61 = input_storage.Load<half4>(32);
|
||||
output.Store(32, (_e58 + _e61));
|
||||
half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
|
||||
half2x2 _e70 = half2x2(input_storage.Load<half2>(44+0), input_storage.Load<half2>(44+4));
|
||||
{
|
||||
half2x2 _value2 = (_e67 + _e70);
|
||||
output.Store(44+0, _value2[0]);
|
||||
output.Store(44+4, _value2[1]);
|
||||
}
|
||||
half2x3 _e76 = input_uniform.val_mat2x3_;
|
||||
half2x3 _e79 = half2x3(input_storage.Load<half3>(56+0), input_storage.Load<half3>(56+8));
|
||||
{
|
||||
half2x3 _value2 = (_e76 + _e79);
|
||||
output.Store(56+0, _value2[0]);
|
||||
output.Store(56+8, _value2[1]);
|
||||
}
|
||||
half2x4 _e85 = input_uniform.val_mat2x4_;
|
||||
half2x4 _e88 = half2x4(input_storage.Load<half4>(72+0), input_storage.Load<half4>(72+8));
|
||||
{
|
||||
half2x4 _value2 = (_e85 + _e88);
|
||||
output.Store(72+0, _value2[0]);
|
||||
output.Store(72+8, _value2[1]);
|
||||
}
|
||||
half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
|
||||
half3x2 _e97 = half3x2(input_storage.Load<half2>(88+0), input_storage.Load<half2>(88+4), input_storage.Load<half2>(88+8));
|
||||
{
|
||||
half3x2 _value2 = (_e94 + _e97);
|
||||
output.Store(88+0, _value2[0]);
|
||||
output.Store(88+4, _value2[1]);
|
||||
output.Store(88+8, _value2[2]);
|
||||
}
|
||||
half3x3 _e103 = input_uniform.val_mat3x3_;
|
||||
half3x3 _e106 = half3x3(input_storage.Load<half3>(104+0), input_storage.Load<half3>(104+8), input_storage.Load<half3>(104+16));
|
||||
{
|
||||
half3x3 _value2 = (_e103 + _e106);
|
||||
output.Store(104+0, _value2[0]);
|
||||
output.Store(104+8, _value2[1]);
|
||||
output.Store(104+16, _value2[2]);
|
||||
}
|
||||
half3x4 _e112 = input_uniform.val_mat3x4_;
|
||||
half3x4 _e115 = half3x4(input_storage.Load<half4>(128+0), input_storage.Load<half4>(128+8), input_storage.Load<half4>(128+16));
|
||||
{
|
||||
half3x4 _value2 = (_e112 + _e115);
|
||||
output.Store(128+0, _value2[0]);
|
||||
output.Store(128+8, _value2[1]);
|
||||
output.Store(128+16, _value2[2]);
|
||||
}
|
||||
half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
|
||||
half4x2 _e124 = half4x2(input_storage.Load<half2>(152+0), input_storage.Load<half2>(152+4), input_storage.Load<half2>(152+8), input_storage.Load<half2>(152+12));
|
||||
{
|
||||
half4x2 _value2 = (_e121 + _e124);
|
||||
output.Store(152+0, _value2[0]);
|
||||
output.Store(152+4, _value2[1]);
|
||||
output.Store(152+8, _value2[2]);
|
||||
output.Store(152+12, _value2[3]);
|
||||
}
|
||||
half4x3 _e130 = input_uniform.val_mat4x3_;
|
||||
half4x3 _e133 = half4x3(input_storage.Load<half3>(168+0), input_storage.Load<half3>(168+8), input_storage.Load<half3>(168+16), input_storage.Load<half3>(168+24));
|
||||
{
|
||||
half4x3 _value2 = (_e130 + _e133);
|
||||
output.Store(168+0, _value2[0]);
|
||||
output.Store(168+8, _value2[1]);
|
||||
output.Store(168+16, _value2[2]);
|
||||
output.Store(168+24, _value2[3]);
|
||||
}
|
||||
half4x4 _e139 = input_uniform.val_mat4x4_;
|
||||
half4x4 _e142 = half4x4(input_storage.Load<half4>(200+0), input_storage.Load<half4>(200+8), input_storage.Load<half4>(200+16), input_storage.Load<half4>(200+24));
|
||||
{
|
||||
half4x4 _value2 = (_e139 + _e142);
|
||||
output.Store(200+0, _value2[0]);
|
||||
output.Store(200+8, _value2[1]);
|
||||
output.Store(200+16, _value2[2]);
|
||||
output.Store(200+24, _value2[3]);
|
||||
}
|
||||
half _e148[2] = Constructarray2_half_(input_arrays.Load<half>(0+0), input_arrays.Load<half>(0+2));
|
||||
{
|
||||
half _value2[2] = _e148;
|
||||
output_arrays.Store(0+0, _value2[0]);
|
||||
output_arrays.Store(0+2, _value2[1]);
|
||||
}
|
||||
half _e149 = val;
|
||||
half _e151 = val;
|
||||
val = (_e151 + abs(_e149));
|
||||
half _e153 = val;
|
||||
half _e154 = val;
|
||||
half _e155 = val;
|
||||
half _e157 = val;
|
||||
val = (_e157 + clamp(_e153, _e154, _e155));
|
||||
half _e159 = val;
|
||||
half _e161 = val;
|
||||
half _e164 = val;
|
||||
val = (_e164 + dot((_e159).xx, (_e161).xx));
|
||||
half _e166 = val;
|
||||
half _e167 = val;
|
||||
half _e169 = val;
|
||||
val = (_e169 + max(_e166, _e167));
|
||||
half _e171 = val;
|
||||
half _e172 = val;
|
||||
half _e174 = val;
|
||||
val = (_e174 + min(_e171, _e172));
|
||||
half _e176 = val;
|
||||
half _e178 = val;
|
||||
val = (_e178 + sign(_e176));
|
||||
half _e181 = val;
|
||||
val = (_e181 + 1.0h);
|
||||
half2 _e185 = input_uniform.val_f16_2_;
|
||||
float2 float_vec2_ = float2(_e185);
|
||||
output.Store(16, half2(float_vec2_));
|
||||
half3 _e192 = input_uniform.val_f16_3_;
|
||||
float3 float_vec3_ = float3(_e192);
|
||||
output.Store(24, half3(float_vec3_));
|
||||
half4 _e199 = input_uniform.val_f16_4_;
|
||||
float4 float_vec4_ = float4(_e199);
|
||||
output.Store(32, half4(float_vec4_));
|
||||
half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
|
||||
{
|
||||
half2x2 _value2 = half2x2(float2x2(_e208));
|
||||
output.Store(44+0, _value2[0]);
|
||||
output.Store(44+4, _value2[1]);
|
||||
}
|
||||
half2x3 _e215 = input_uniform.val_mat2x3_;
|
||||
{
|
||||
half2x3 _value2 = half2x3(float2x3(_e215));
|
||||
output.Store(56+0, _value2[0]);
|
||||
output.Store(56+8, _value2[1]);
|
||||
}
|
||||
half2x4 _e222 = input_uniform.val_mat2x4_;
|
||||
{
|
||||
half2x4 _value2 = half2x4(float2x4(_e222));
|
||||
output.Store(72+0, _value2[0]);
|
||||
output.Store(72+8, _value2[1]);
|
||||
}
|
||||
half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
|
||||
{
|
||||
half3x2 _value2 = half3x2(float3x2(_e229));
|
||||
output.Store(88+0, _value2[0]);
|
||||
output.Store(88+4, _value2[1]);
|
||||
output.Store(88+8, _value2[2]);
|
||||
}
|
||||
half3x3 _e236 = input_uniform.val_mat3x3_;
|
||||
{
|
||||
half3x3 _value2 = half3x3(float3x3(_e236));
|
||||
output.Store(104+0, _value2[0]);
|
||||
output.Store(104+8, _value2[1]);
|
||||
output.Store(104+16, _value2[2]);
|
||||
}
|
||||
half3x4 _e243 = input_uniform.val_mat3x4_;
|
||||
{
|
||||
half3x4 _value2 = half3x4(float3x4(_e243));
|
||||
output.Store(128+0, _value2[0]);
|
||||
output.Store(128+8, _value2[1]);
|
||||
output.Store(128+16, _value2[2]);
|
||||
}
|
||||
half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
|
||||
{
|
||||
half4x2 _value2 = half4x2(float4x2(_e250));
|
||||
output.Store(152+0, _value2[0]);
|
||||
output.Store(152+4, _value2[1]);
|
||||
output.Store(152+8, _value2[2]);
|
||||
output.Store(152+12, _value2[3]);
|
||||
}
|
||||
half4x3 _e257 = input_uniform.val_mat4x3_;
|
||||
{
|
||||
half4x3 _value2 = half4x3(float4x3(_e257));
|
||||
output.Store(168+0, _value2[0]);
|
||||
output.Store(168+8, _value2[1]);
|
||||
output.Store(168+16, _value2[2]);
|
||||
output.Store(168+24, _value2[3]);
|
||||
}
|
||||
half4x4 _e264 = input_uniform.val_mat4x4_;
|
||||
{
|
||||
half4x4 _value2 = half4x4(float4x4(_e264));
|
||||
output.Store(200+0, _value2[0]);
|
||||
output.Store(200+8, _value2[1]);
|
||||
output.Store(200+16, _value2[2]);
|
||||
output.Store(200+24, _value2[3]);
|
||||
}
|
||||
half _e267 = val;
|
||||
return _e267;
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main()
|
||||
{
|
||||
const half _e3 = f16_function(2.0h);
|
||||
output.Store(40, _e3);
|
||||
return;
|
||||
}
|
||||
12
naga/tests/out/hlsl/f16.ron
Normal file
12
naga/tests/out/hlsl/f16.ron
Normal file
@@ -0,0 +1,12 @@
|
||||
(
|
||||
vertex:[
|
||||
],
|
||||
fragment:[
|
||||
],
|
||||
compute:[
|
||||
(
|
||||
entry_point:"main",
|
||||
target_profile:"cs_6_2",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -6,7 +6,7 @@ using metal::uint;
|
||||
|
||||
struct PushConstants {
|
||||
uint index;
|
||||
char _pad1[12];
|
||||
char _pad1[4];
|
||||
metal::float2 double_;
|
||||
};
|
||||
struct FragmentIn {
|
||||
|
||||
177
naga/tests/out/msl/f16.msl
Normal file
177
naga/tests/out/msl/f16.msl
Normal file
@@ -0,0 +1,177 @@
|
||||
// language: metal1.0
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using metal::uint;
|
||||
|
||||
struct UniformCompatible {
|
||||
uint val_u32_;
|
||||
int val_i32_;
|
||||
float val_f32_;
|
||||
half val_f16_;
|
||||
char _pad4[2];
|
||||
metal::half2 val_f16_2_;
|
||||
char _pad5[4];
|
||||
metal::half3 val_f16_3_;
|
||||
metal::half4 val_f16_4_;
|
||||
half final_value;
|
||||
char _pad8[2];
|
||||
metal::half2x2 val_mat2x2_;
|
||||
char _pad9[4];
|
||||
metal::half2x3 val_mat2x3_;
|
||||
metal::half2x4 val_mat2x4_;
|
||||
metal::half3x2 val_mat3x2_;
|
||||
char _pad12[4];
|
||||
metal::half3x3 val_mat3x3_;
|
||||
metal::half3x4 val_mat3x4_;
|
||||
metal::half4x2 val_mat4x2_;
|
||||
metal::half4x3 val_mat4x3_;
|
||||
metal::half4x4 val_mat4x4_;
|
||||
};
|
||||
struct type_16 {
|
||||
half inner[2];
|
||||
};
|
||||
struct StorageCompatible {
|
||||
type_16 val_f16_array_2_;
|
||||
};
|
||||
struct LayoutTest {
|
||||
half scalar1_;
|
||||
half scalar2_;
|
||||
char _pad2[4];
|
||||
metal::packed_half3 v3_;
|
||||
half tuck_in;
|
||||
half scalar4_;
|
||||
char _pad5[2];
|
||||
uint larger;
|
||||
};
|
||||
constant half constant_variable = 15.203125h;
|
||||
|
||||
half f16_function(
|
||||
half x,
|
||||
constant UniformCompatible& input_uniform,
|
||||
device UniformCompatible const& input_storage,
|
||||
device StorageCompatible const& input_arrays,
|
||||
device UniformCompatible& output,
|
||||
device StorageCompatible& output_arrays
|
||||
) {
|
||||
half val = 15.203125h;
|
||||
half _e4 = val;
|
||||
val = _e4 + -33344.0h;
|
||||
half _e6 = val;
|
||||
half _e9 = val;
|
||||
val = _e9 + (_e6 + 5.0h);
|
||||
float _e13 = input_uniform.val_f32_;
|
||||
half _e14 = val;
|
||||
half _e18 = val;
|
||||
val = _e18 + static_cast<half>(_e13 + static_cast<float>(_e14));
|
||||
half _e22 = input_uniform.val_f16_;
|
||||
half _e25 = val;
|
||||
val = _e25 + metal::half3(_e22).z;
|
||||
half _e31 = input_uniform.val_f16_;
|
||||
half _e34 = input_storage.val_f16_;
|
||||
output.val_f16_ = _e31 + _e34;
|
||||
metal::half2 _e40 = input_uniform.val_f16_2_;
|
||||
metal::half2 _e43 = input_storage.val_f16_2_;
|
||||
output.val_f16_2_ = _e40 + _e43;
|
||||
metal::half3 _e49 = input_uniform.val_f16_3_;
|
||||
metal::half3 _e52 = input_storage.val_f16_3_;
|
||||
output.val_f16_3_ = _e49 + _e52;
|
||||
metal::half4 _e58 = input_uniform.val_f16_4_;
|
||||
metal::half4 _e61 = input_storage.val_f16_4_;
|
||||
output.val_f16_4_ = _e58 + _e61;
|
||||
metal::half2x2 _e67 = input_uniform.val_mat2x2_;
|
||||
metal::half2x2 _e70 = input_storage.val_mat2x2_;
|
||||
output.val_mat2x2_ = _e67 + _e70;
|
||||
metal::half2x3 _e76 = input_uniform.val_mat2x3_;
|
||||
metal::half2x3 _e79 = input_storage.val_mat2x3_;
|
||||
output.val_mat2x3_ = _e76 + _e79;
|
||||
metal::half2x4 _e85 = input_uniform.val_mat2x4_;
|
||||
metal::half2x4 _e88 = input_storage.val_mat2x4_;
|
||||
output.val_mat2x4_ = _e85 + _e88;
|
||||
metal::half3x2 _e94 = input_uniform.val_mat3x2_;
|
||||
metal::half3x2 _e97 = input_storage.val_mat3x2_;
|
||||
output.val_mat3x2_ = _e94 + _e97;
|
||||
metal::half3x3 _e103 = input_uniform.val_mat3x3_;
|
||||
metal::half3x3 _e106 = input_storage.val_mat3x3_;
|
||||
output.val_mat3x3_ = _e103 + _e106;
|
||||
metal::half3x4 _e112 = input_uniform.val_mat3x4_;
|
||||
metal::half3x4 _e115 = input_storage.val_mat3x4_;
|
||||
output.val_mat3x4_ = _e112 + _e115;
|
||||
metal::half4x2 _e121 = input_uniform.val_mat4x2_;
|
||||
metal::half4x2 _e124 = input_storage.val_mat4x2_;
|
||||
output.val_mat4x2_ = _e121 + _e124;
|
||||
metal::half4x3 _e130 = input_uniform.val_mat4x3_;
|
||||
metal::half4x3 _e133 = input_storage.val_mat4x3_;
|
||||
output.val_mat4x3_ = _e130 + _e133;
|
||||
metal::half4x4 _e139 = input_uniform.val_mat4x4_;
|
||||
metal::half4x4 _e142 = input_storage.val_mat4x4_;
|
||||
output.val_mat4x4_ = _e139 + _e142;
|
||||
type_16 _e148 = input_arrays.val_f16_array_2_;
|
||||
output_arrays.val_f16_array_2_ = _e148;
|
||||
half _e149 = val;
|
||||
half _e151 = val;
|
||||
val = _e151 + metal::abs(_e149);
|
||||
half _e153 = val;
|
||||
half _e154 = val;
|
||||
half _e155 = val;
|
||||
half _e157 = val;
|
||||
val = _e157 + metal::clamp(_e153, _e154, _e155);
|
||||
half _e159 = val;
|
||||
half _e161 = val;
|
||||
half _e164 = val;
|
||||
val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161));
|
||||
half _e166 = val;
|
||||
half _e167 = val;
|
||||
half _e169 = val;
|
||||
val = _e169 + metal::max(_e166, _e167);
|
||||
half _e171 = val;
|
||||
half _e172 = val;
|
||||
half _e174 = val;
|
||||
val = _e174 + metal::min(_e171, _e172);
|
||||
half _e176 = val;
|
||||
half _e178 = val;
|
||||
val = _e178 + metal::sign(_e176);
|
||||
half _e181 = val;
|
||||
val = _e181 + 1.0h;
|
||||
metal::half2 _e185 = input_uniform.val_f16_2_;
|
||||
metal::float2 float_vec2_ = static_cast<metal::float2>(_e185);
|
||||
output.val_f16_2_ = static_cast<metal::half2>(float_vec2_);
|
||||
metal::half3 _e192 = input_uniform.val_f16_3_;
|
||||
metal::float3 float_vec3_ = static_cast<metal::float3>(_e192);
|
||||
output.val_f16_3_ = static_cast<metal::half3>(float_vec3_);
|
||||
metal::half4 _e199 = input_uniform.val_f16_4_;
|
||||
metal::float4 float_vec4_ = static_cast<metal::float4>(_e199);
|
||||
output.val_f16_4_ = static_cast<metal::half4>(float_vec4_);
|
||||
metal::half2x2 _e208 = input_uniform.val_mat2x2_;
|
||||
output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208));
|
||||
metal::half2x3 _e215 = input_uniform.val_mat2x3_;
|
||||
output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215));
|
||||
metal::half2x4 _e222 = input_uniform.val_mat2x4_;
|
||||
output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222));
|
||||
metal::half3x2 _e229 = input_uniform.val_mat3x2_;
|
||||
output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229));
|
||||
metal::half3x3 _e236 = input_uniform.val_mat3x3_;
|
||||
output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236));
|
||||
metal::half3x4 _e243 = input_uniform.val_mat3x4_;
|
||||
output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243));
|
||||
metal::half4x2 _e250 = input_uniform.val_mat4x2_;
|
||||
output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250));
|
||||
metal::half4x3 _e257 = input_uniform.val_mat4x3_;
|
||||
output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257));
|
||||
metal::half4x4 _e264 = input_uniform.val_mat4x4_;
|
||||
output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264));
|
||||
half _e267 = val;
|
||||
return _e267;
|
||||
}
|
||||
|
||||
kernel void main_(
|
||||
constant UniformCompatible& input_uniform [[user(fake0)]]
|
||||
, device UniformCompatible const& input_storage [[user(fake0)]]
|
||||
, device StorageCompatible const& input_arrays [[user(fake0)]]
|
||||
, device UniformCompatible& output [[user(fake0)]]
|
||||
, device StorageCompatible& output_arrays [[user(fake0)]]
|
||||
) {
|
||||
half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays);
|
||||
output.final_value = _e3;
|
||||
return;
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: rspirv
|
||||
; Bound: 175
|
||||
; Bound: 173
|
||||
OpCapability Shader
|
||||
OpCapability Int64Atomics
|
||||
OpCapability Int64
|
||||
@@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64"
|
||||
OpEntryPoint GLCompute %105 "test_atomic_compare_exchange_u64"
|
||||
OpEntryPoint GLCompute %104 "test_atomic_compare_exchange_u64"
|
||||
OpExecutionMode %19 LocalSize 1 1 1
|
||||
OpExecutionMode %105 LocalSize 1 1 1
|
||||
OpExecutionMode %104 LocalSize 1 1 1
|
||||
OpDecorate %5 ArrayStride 8
|
||||
OpDecorate %8 ArrayStride 8
|
||||
OpMemberDecorate %10 0 Offset 0
|
||||
@@ -63,12 +63,12 @@ OpMemberDecorate %16 0 Offset 0
|
||||
%68 = OpTypeInt 32 1
|
||||
%67 = OpConstant %68 1
|
||||
%69 = OpConstant %3 64
|
||||
%106 = OpTypePointer StorageBuffer %8
|
||||
%108 = OpConstant %7 10
|
||||
%111 = OpTypePointer Function %7
|
||||
%112 = OpConstantNull %7
|
||||
%114 = OpConstantNull %9
|
||||
%138 = OpTypePointer StorageBuffer %7
|
||||
%105 = OpTypePointer StorageBuffer %8
|
||||
%107 = OpConstant %7 10
|
||||
%110 = OpTypePointer Function %7
|
||||
%111 = OpConstantNull %7
|
||||
%113 = OpConstantNull %9
|
||||
%137 = OpTypePointer StorageBuffer %7
|
||||
%19 = OpFunction %2 None %20
|
||||
%18 = OpLabel
|
||||
%27 = OpVariable %28 Function %22
|
||||
@@ -142,16 +142,16 @@ OpBranch %89
|
||||
%89 = OpLabel
|
||||
%91 = OpLoad %4 %29
|
||||
%92 = OpIAdd %4 %91 %25
|
||||
%94 = OpLoad %3 %27
|
||||
%95 = OpLoad %4 %29
|
||||
%97 = OpAccessChain %64 %23 %94
|
||||
%98 = OpAtomicCompareExchange %4 %97 %67 %69 %69 %92 %95
|
||||
%99 = OpIEqual %9 %98 %95
|
||||
%96 = OpCompositeConstruct %10 %98 %99
|
||||
%100 = OpCompositeExtract %4 %96 0
|
||||
OpStore %29 %100
|
||||
%101 = OpCompositeExtract %9 %96 1
|
||||
OpStore %32 %101
|
||||
%93 = OpLoad %3 %27
|
||||
%94 = OpLoad %4 %29
|
||||
%96 = OpAccessChain %64 %23 %93
|
||||
%97 = OpAtomicCompareExchange %4 %96 %67 %69 %69 %92 %94
|
||||
%98 = OpIEqual %9 %97 %94
|
||||
%95 = OpCompositeConstruct %10 %97 %98
|
||||
%99 = OpCompositeExtract %4 %95 0
|
||||
OpStore %29 %99
|
||||
%100 = OpCompositeExtract %9 %95 1
|
||||
OpStore %32 %100
|
||||
OpBranch %90
|
||||
%90 = OpLabel
|
||||
OpBranch %73
|
||||
@@ -162,110 +162,110 @@ OpBranch %62
|
||||
%62 = OpLabel
|
||||
OpBranch %39
|
||||
%39 = OpLabel
|
||||
%102 = OpLoad %3 %27
|
||||
%103 = OpIAdd %3 %102 %26
|
||||
OpStore %27 %103
|
||||
%101 = OpLoad %3 %27
|
||||
%102 = OpIAdd %3 %101 %26
|
||||
OpStore %27 %102
|
||||
OpBranch %36
|
||||
%37 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
%105 = OpFunction %2 None %20
|
||||
%104 = OpLabel
|
||||
%109 = OpVariable %28 Function %22
|
||||
%110 = OpVariable %111 Function %112
|
||||
%113 = OpVariable %33 Function %114
|
||||
%120 = OpVariable %41 Function %43
|
||||
%145 = OpVariable %41 Function %43
|
||||
%107 = OpAccessChain %106 %15 %22
|
||||
%104 = OpFunction %2 None %20
|
||||
%103 = OpLabel
|
||||
%108 = OpVariable %28 Function %22
|
||||
%109 = OpVariable %110 Function %111
|
||||
%112 = OpVariable %33 Function %113
|
||||
%119 = OpVariable %41 Function %43
|
||||
%144 = OpVariable %41 Function %43
|
||||
%106 = OpAccessChain %105 %15 %22
|
||||
OpBranch %114
|
||||
%114 = OpLabel
|
||||
OpBranch %115
|
||||
%115 = OpLabel
|
||||
OpBranch %116
|
||||
%116 = OpLabel
|
||||
OpLoopMerge %117 %119 None
|
||||
OpBranch %121
|
||||
%121 = OpLabel
|
||||
%122 = OpLoad %40 %120
|
||||
%123 = OpIEqual %42 %45 %122
|
||||
%124 = OpAll %9 %123
|
||||
OpSelectionMerge %125 None
|
||||
OpBranchConditional %124 %117 %125
|
||||
%125 = OpLabel
|
||||
%126 = OpCompositeExtract %3 %122 1
|
||||
%127 = OpIEqual %9 %126 %44
|
||||
%128 = OpSelect %3 %127 %26 %22
|
||||
%129 = OpCompositeConstruct %40 %128 %26
|
||||
%130 = OpIAdd %40 %122 %129
|
||||
OpStore %120 %130
|
||||
OpBranch %118
|
||||
%118 = OpLabel
|
||||
%131 = OpLoad %3 %109
|
||||
%132 = OpULessThan %9 %131 %6
|
||||
OpSelectionMerge %133 None
|
||||
OpBranchConditional %132 %133 %134
|
||||
%134 = OpLabel
|
||||
OpLoopMerge %116 %118 None
|
||||
OpBranch %120
|
||||
%120 = OpLabel
|
||||
%121 = OpLoad %40 %119
|
||||
%122 = OpIEqual %42 %45 %121
|
||||
%123 = OpAll %9 %122
|
||||
OpSelectionMerge %124 None
|
||||
OpBranchConditional %123 %116 %124
|
||||
%124 = OpLabel
|
||||
%125 = OpCompositeExtract %3 %121 1
|
||||
%126 = OpIEqual %9 %125 %44
|
||||
%127 = OpSelect %3 %126 %26 %22
|
||||
%128 = OpCompositeConstruct %40 %127 %26
|
||||
%129 = OpIAdd %40 %121 %128
|
||||
OpStore %119 %129
|
||||
OpBranch %117
|
||||
%117 = OpLabel
|
||||
%130 = OpLoad %3 %108
|
||||
%131 = OpULessThan %9 %130 %6
|
||||
OpSelectionMerge %132 None
|
||||
OpBranchConditional %131 %132 %133
|
||||
%133 = OpLabel
|
||||
OpBranch %135
|
||||
%135 = OpLabel
|
||||
%137 = OpLoad %3 %109
|
||||
%139 = OpAccessChain %138 %107 %137
|
||||
%140 = OpAtomicLoad %7 %139 %67 %69
|
||||
OpStore %110 %140
|
||||
OpStore %113 %24
|
||||
OpBranch %141
|
||||
%141 = OpLabel
|
||||
OpLoopMerge %142 %144 None
|
||||
OpBranch %146
|
||||
%146 = OpLabel
|
||||
%147 = OpLoad %40 %145
|
||||
%148 = OpIEqual %42 %45 %147
|
||||
%149 = OpAll %9 %148
|
||||
OpSelectionMerge %150 None
|
||||
OpBranchConditional %149 %142 %150
|
||||
%150 = OpLabel
|
||||
%151 = OpCompositeExtract %3 %147 1
|
||||
%152 = OpIEqual %9 %151 %44
|
||||
%153 = OpSelect %3 %152 %26 %22
|
||||
%154 = OpCompositeConstruct %40 %153 %26
|
||||
%155 = OpIAdd %40 %147 %154
|
||||
OpStore %145 %155
|
||||
OpBranch %143
|
||||
%143 = OpLabel
|
||||
%156 = OpLoad %9 %113
|
||||
%157 = OpLogicalNot %9 %156
|
||||
OpSelectionMerge %158 None
|
||||
OpBranchConditional %157 %158 %159
|
||||
%159 = OpLabel
|
||||
OpBranch %116
|
||||
%132 = OpLabel
|
||||
OpBranch %134
|
||||
%134 = OpLabel
|
||||
%136 = OpLoad %3 %108
|
||||
%138 = OpAccessChain %137 %106 %136
|
||||
%139 = OpAtomicLoad %7 %138 %67 %69
|
||||
OpStore %109 %139
|
||||
OpStore %112 %24
|
||||
OpBranch %140
|
||||
%140 = OpLabel
|
||||
OpLoopMerge %141 %143 None
|
||||
OpBranch %145
|
||||
%145 = OpLabel
|
||||
%146 = OpLoad %40 %144
|
||||
%147 = OpIEqual %42 %45 %146
|
||||
%148 = OpAll %9 %147
|
||||
OpSelectionMerge %149 None
|
||||
OpBranchConditional %148 %141 %149
|
||||
%149 = OpLabel
|
||||
%150 = OpCompositeExtract %3 %146 1
|
||||
%151 = OpIEqual %9 %150 %44
|
||||
%152 = OpSelect %3 %151 %26 %22
|
||||
%153 = OpCompositeConstruct %40 %152 %26
|
||||
%154 = OpIAdd %40 %146 %153
|
||||
OpStore %144 %154
|
||||
OpBranch %142
|
||||
%142 = OpLabel
|
||||
%155 = OpLoad %9 %112
|
||||
%156 = OpLogicalNot %9 %155
|
||||
OpSelectionMerge %157 None
|
||||
OpBranchConditional %156 %157 %158
|
||||
%158 = OpLabel
|
||||
OpBranch %141
|
||||
%157 = OpLabel
|
||||
OpBranch %159
|
||||
%159 = OpLabel
|
||||
%161 = OpLoad %7 %109
|
||||
%162 = OpIAdd %7 %161 %107
|
||||
%163 = OpLoad %3 %108
|
||||
%164 = OpLoad %7 %109
|
||||
%166 = OpAccessChain %137 %106 %163
|
||||
%167 = OpAtomicCompareExchange %7 %166 %67 %69 %69 %162 %164
|
||||
%168 = OpIEqual %9 %167 %164
|
||||
%165 = OpCompositeConstruct %11 %167 %168
|
||||
%169 = OpCompositeExtract %7 %165 0
|
||||
OpStore %109 %169
|
||||
%170 = OpCompositeExtract %9 %165 1
|
||||
OpStore %112 %170
|
||||
OpBranch %160
|
||||
%160 = OpLabel
|
||||
%162 = OpLoad %7 %110
|
||||
%163 = OpIAdd %7 %162 %108
|
||||
%165 = OpLoad %3 %109
|
||||
%166 = OpLoad %7 %110
|
||||
%168 = OpAccessChain %138 %107 %165
|
||||
%169 = OpAtomicCompareExchange %7 %168 %67 %69 %69 %163 %166
|
||||
%170 = OpIEqual %9 %169 %166
|
||||
%167 = OpCompositeConstruct %11 %169 %170
|
||||
%171 = OpCompositeExtract %7 %167 0
|
||||
OpStore %110 %171
|
||||
%172 = OpCompositeExtract %9 %167 1
|
||||
OpStore %113 %172
|
||||
OpBranch %161
|
||||
%161 = OpLabel
|
||||
OpBranch %144
|
||||
%144 = OpLabel
|
||||
OpBranch %141
|
||||
%142 = OpLabel
|
||||
OpBranch %136
|
||||
%136 = OpLabel
|
||||
OpBranch %119
|
||||
%119 = OpLabel
|
||||
%173 = OpLoad %3 %109
|
||||
%174 = OpIAdd %3 %173 %26
|
||||
OpStore %109 %174
|
||||
OpBranch %116
|
||||
%117 = OpLabel
|
||||
OpBranch %143
|
||||
%143 = OpLabel
|
||||
OpBranch %140
|
||||
%141 = OpLabel
|
||||
OpBranch %135
|
||||
%135 = OpLabel
|
||||
OpBranch %118
|
||||
%118 = OpLabel
|
||||
%171 = OpLoad %3 %108
|
||||
%172 = OpIAdd %3 %171 %26
|
||||
OpStore %108 %172
|
||||
OpBranch %115
|
||||
%116 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -1,76 +1,74 @@
|
||||
; SPIR-V
|
||||
; Version: 1.2
|
||||
; Generator: rspirv
|
||||
; Bound: 48
|
||||
; Bound: 47
|
||||
OpCapability Shader
|
||||
OpCapability Float64
|
||||
OpCapability Geometry
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %23 "main" %15 %18 %21
|
||||
OpExecutionMode %23 OriginUpperLeft
|
||||
OpEntryPoint Fragment %22 "main" %14 %17 %20
|
||||
OpExecutionMode %22 OriginUpperLeft
|
||||
OpMemberDecorate %6 0 Offset 0
|
||||
OpMemberDecorate %6 1 Offset 16
|
||||
OpMemberDecorate %9 0 Offset 0
|
||||
OpMemberDecorate %9 1 Offset 16
|
||||
OpDecorate %11 Block
|
||||
OpMemberDecorate %11 0 Offset 0
|
||||
OpDecorate %15 Location 0
|
||||
OpDecorate %18 BuiltIn PrimitiveId
|
||||
OpDecorate %18 Flat
|
||||
OpDecorate %21 Location 0
|
||||
OpMemberDecorate %6 1 Offset 8
|
||||
OpMemberDecorate %8 0 Offset 0
|
||||
OpMemberDecorate %8 1 Offset 16
|
||||
OpDecorate %10 Block
|
||||
OpMemberDecorate %10 0 Offset 0
|
||||
OpDecorate %14 Location 0
|
||||
OpDecorate %17 BuiltIn PrimitiveId
|
||||
OpDecorate %17 Flat
|
||||
OpDecorate %20 Location 0
|
||||
%2 = OpTypeVoid
|
||||
%3 = OpTypeInt 32 0
|
||||
%5 = OpTypeFloat 64
|
||||
%5 = OpTypeFloat 32
|
||||
%4 = OpTypeVector %5 2
|
||||
%6 = OpTypeStruct %3 %4
|
||||
%8 = OpTypeFloat 32
|
||||
%7 = OpTypeVector %8 4
|
||||
%9 = OpTypeStruct %7 %3
|
||||
%11 = OpTypeStruct %6
|
||||
%12 = OpTypePointer PushConstant %11
|
||||
%10 = OpVariable %12 PushConstant
|
||||
%16 = OpTypePointer Input %7
|
||||
%15 = OpVariable %16 Input
|
||||
%19 = OpTypePointer Input %3
|
||||
%18 = OpVariable %19 Input
|
||||
%22 = OpTypePointer Output %7
|
||||
%21 = OpVariable %22 Output
|
||||
%24 = OpTypeFunction %2
|
||||
%25 = OpTypePointer PushConstant %6
|
||||
%26 = OpConstant %3 0
|
||||
%28 = OpConstant %8 1.0
|
||||
%29 = OpTypeVector %8 3
|
||||
%30 = OpConstantComposite %29 %28 %28 %28
|
||||
%33 = OpTypePointer PushConstant %3
|
||||
%36 = OpTypeBool
|
||||
%23 = OpFunction %2 None %24
|
||||
%13 = OpLabel
|
||||
%17 = OpLoad %7 %15
|
||||
%20 = OpLoad %3 %18
|
||||
%14 = OpCompositeConstruct %9 %17 %20
|
||||
%27 = OpAccessChain %25 %10 %26
|
||||
OpBranch %31
|
||||
%31 = OpLabel
|
||||
%32 = OpCompositeExtract %3 %14 1
|
||||
%34 = OpAccessChain %33 %27 %26
|
||||
%35 = OpLoad %3 %34
|
||||
%37 = OpIEqual %36 %32 %35
|
||||
OpSelectionMerge %38 None
|
||||
OpBranchConditional %37 %39 %40
|
||||
%39 = OpLabel
|
||||
%41 = OpCompositeExtract %7 %14 0
|
||||
OpStore %21 %41
|
||||
OpReturn
|
||||
%40 = OpLabel
|
||||
%42 = OpCompositeExtract %7 %14 0
|
||||
%43 = OpVectorShuffle %29 %42 %42 0 1 2
|
||||
%44 = OpFSub %29 %30 %43
|
||||
%45 = OpCompositeExtract %7 %14 0
|
||||
%46 = OpCompositeExtract %8 %45 3
|
||||
%47 = OpCompositeConstruct %7 %44 %46
|
||||
OpStore %21 %47
|
||||
OpReturn
|
||||
%7 = OpTypeVector %5 4
|
||||
%8 = OpTypeStruct %7 %3
|
||||
%10 = OpTypeStruct %6
|
||||
%11 = OpTypePointer PushConstant %10
|
||||
%9 = OpVariable %11 PushConstant
|
||||
%15 = OpTypePointer Input %7
|
||||
%14 = OpVariable %15 Input
|
||||
%18 = OpTypePointer Input %3
|
||||
%17 = OpVariable %18 Input
|
||||
%21 = OpTypePointer Output %7
|
||||
%20 = OpVariable %21 Output
|
||||
%23 = OpTypeFunction %2
|
||||
%24 = OpTypePointer PushConstant %6
|
||||
%25 = OpConstant %3 0
|
||||
%27 = OpConstant %5 1.0
|
||||
%28 = OpTypeVector %5 3
|
||||
%29 = OpConstantComposite %28 %27 %27 %27
|
||||
%32 = OpTypePointer PushConstant %3
|
||||
%35 = OpTypeBool
|
||||
%22 = OpFunction %2 None %23
|
||||
%12 = OpLabel
|
||||
%16 = OpLoad %7 %14
|
||||
%19 = OpLoad %3 %17
|
||||
%13 = OpCompositeConstruct %8 %16 %19
|
||||
%26 = OpAccessChain %24 %9 %25
|
||||
OpBranch %30
|
||||
%30 = OpLabel
|
||||
%31 = OpCompositeExtract %3 %13 1
|
||||
%33 = OpAccessChain %32 %26 %25
|
||||
%34 = OpLoad %3 %33
|
||||
%36 = OpIEqual %35 %31 %34
|
||||
OpSelectionMerge %37 None
|
||||
OpBranchConditional %36 %38 %39
|
||||
%38 = OpLabel
|
||||
%40 = OpCompositeExtract %7 %13 0
|
||||
OpStore %20 %40
|
||||
OpReturn
|
||||
%39 = OpLabel
|
||||
%41 = OpCompositeExtract %7 %13 0
|
||||
%42 = OpVectorShuffle %28 %41 %41 0 1 2
|
||||
%43 = OpFSub %28 %29 %42
|
||||
%44 = OpCompositeExtract %7 %13 0
|
||||
%45 = OpCompositeExtract %5 %44 3
|
||||
%46 = OpCompositeConstruct %7 %43 %45
|
||||
OpStore %20 %46
|
||||
OpReturn
|
||||
%37 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
633
naga/tests/out/spv/f16.spvasm
Normal file
633
naga/tests/out/spv/f16.spvasm
Normal file
@@ -0,0 +1,633 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: rspirv
|
||||
; Bound: 511
|
||||
OpCapability Shader
|
||||
OpCapability Float16
|
||||
OpCapability StorageBuffer16BitAccess
|
||||
OpCapability UniformAndStorageBuffer16BitAccess
|
||||
OpCapability StorageInputOutput16
|
||||
OpExtension "SPV_KHR_storage_buffer_storage_class"
|
||||
OpExtension "SPV_KHR_16bit_storage"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %499 "main"
|
||||
OpExecutionMode %499 LocalSize 1 1 1
|
||||
OpMemberDecorate %19 0 Offset 0
|
||||
OpMemberDecorate %19 1 Offset 4
|
||||
OpMemberDecorate %19 2 Offset 8
|
||||
OpMemberDecorate %19 3 Offset 12
|
||||
OpMemberDecorate %19 4 Offset 16
|
||||
OpMemberDecorate %19 5 Offset 24
|
||||
OpMemberDecorate %19 6 Offset 32
|
||||
OpMemberDecorate %19 7 Offset 40
|
||||
OpMemberDecorate %19 8 Offset 44
|
||||
OpMemberDecorate %19 8 ColMajor
|
||||
OpMemberDecorate %19 8 MatrixStride 4
|
||||
OpMemberDecorate %19 9 Offset 56
|
||||
OpMemberDecorate %19 9 ColMajor
|
||||
OpMemberDecorate %19 9 MatrixStride 8
|
||||
OpMemberDecorate %19 10 Offset 72
|
||||
OpMemberDecorate %19 10 ColMajor
|
||||
OpMemberDecorate %19 10 MatrixStride 8
|
||||
OpMemberDecorate %19 11 Offset 88
|
||||
OpMemberDecorate %19 11 ColMajor
|
||||
OpMemberDecorate %19 11 MatrixStride 4
|
||||
OpMemberDecorate %19 12 Offset 104
|
||||
OpMemberDecorate %19 12 ColMajor
|
||||
OpMemberDecorate %19 12 MatrixStride 8
|
||||
OpMemberDecorate %19 13 Offset 128
|
||||
OpMemberDecorate %19 13 ColMajor
|
||||
OpMemberDecorate %19 13 MatrixStride 8
|
||||
OpMemberDecorate %19 14 Offset 152
|
||||
OpMemberDecorate %19 14 ColMajor
|
||||
OpMemberDecorate %19 14 MatrixStride 4
|
||||
OpMemberDecorate %19 15 Offset 168
|
||||
OpMemberDecorate %19 15 ColMajor
|
||||
OpMemberDecorate %19 15 MatrixStride 8
|
||||
OpMemberDecorate %19 16 Offset 200
|
||||
OpMemberDecorate %19 16 ColMajor
|
||||
OpMemberDecorate %19 16 MatrixStride 8
|
||||
OpDecorate %20 ArrayStride 2
|
||||
OpMemberDecorate %22 0 Offset 0
|
||||
OpMemberDecorate %23 0 Offset 0
|
||||
OpMemberDecorate %23 1 Offset 2
|
||||
OpMemberDecorate %23 2 Offset 8
|
||||
OpMemberDecorate %23 3 Offset 14
|
||||
OpMemberDecorate %23 4 Offset 16
|
||||
OpMemberDecorate %23 5 Offset 20
|
||||
OpDecorate %28 DescriptorSet 0
|
||||
OpDecorate %28 Binding 0
|
||||
OpDecorate %29 Block
|
||||
OpMemberDecorate %29 0 Offset 0
|
||||
OpDecorate %31 NonWritable
|
||||
OpDecorate %31 DescriptorSet 0
|
||||
OpDecorate %31 Binding 1
|
||||
OpDecorate %32 Block
|
||||
OpMemberDecorate %32 0 Offset 0
|
||||
OpDecorate %34 NonWritable
|
||||
OpDecorate %34 DescriptorSet 0
|
||||
OpDecorate %34 Binding 2
|
||||
OpDecorate %35 Block
|
||||
OpMemberDecorate %35 0 Offset 0
|
||||
OpDecorate %37 DescriptorSet 0
|
||||
OpDecorate %37 Binding 3
|
||||
OpDecorate %38 Block
|
||||
OpMemberDecorate %38 0 Offset 0
|
||||
OpDecorate %40 DescriptorSet 0
|
||||
OpDecorate %40 Binding 4
|
||||
OpDecorate %41 Block
|
||||
OpMemberDecorate %41 0 Offset 0
|
||||
%2 = OpTypeVoid
|
||||
%3 = OpTypeFloat 16
|
||||
%4 = OpTypeInt 32 0
|
||||
%5 = OpTypeInt 32 1
|
||||
%6 = OpTypeFloat 32
|
||||
%7 = OpTypeVector %3 2
|
||||
%8 = OpTypeVector %3 3
|
||||
%9 = OpTypeVector %3 4
|
||||
%10 = OpTypeMatrix %7 2
|
||||
%11 = OpTypeMatrix %8 2
|
||||
%12 = OpTypeMatrix %9 2
|
||||
%13 = OpTypeMatrix %7 3
|
||||
%14 = OpTypeMatrix %8 3
|
||||
%15 = OpTypeMatrix %9 3
|
||||
%16 = OpTypeMatrix %7 4
|
||||
%17 = OpTypeMatrix %8 4
|
||||
%18 = OpTypeMatrix %9 4
|
||||
%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18
|
||||
%21 = OpConstant %4 2
|
||||
%20 = OpTypeArray %3 %21
|
||||
%22 = OpTypeStruct %20
|
||||
%23 = OpTypeStruct %3 %3 %8 %3 %3 %4
|
||||
%24 = OpConstant %3 2.1524e-41
|
||||
%25 = OpConstant %3 2.7121e-41
|
||||
%27 = OpTypePointer Private %3
|
||||
%26 = OpVariable %27 Private %24
|
||||
%29 = OpTypeStruct %19
|
||||
%30 = OpTypePointer Uniform %29
|
||||
%28 = OpVariable %30 Uniform
|
||||
%32 = OpTypeStruct %19
|
||||
%33 = OpTypePointer StorageBuffer %32
|
||||
%31 = OpVariable %33 StorageBuffer
|
||||
%35 = OpTypeStruct %22
|
||||
%36 = OpTypePointer StorageBuffer %35
|
||||
%34 = OpVariable %36 StorageBuffer
|
||||
%38 = OpTypeStruct %19
|
||||
%39 = OpTypePointer StorageBuffer %38
|
||||
%37 = OpVariable %39 StorageBuffer
|
||||
%41 = OpTypeStruct %22
|
||||
%42 = OpTypePointer StorageBuffer %41
|
||||
%40 = OpVariable %42 StorageBuffer
|
||||
%46 = OpTypeFunction %3 %3
|
||||
%47 = OpTypePointer Uniform %19
|
||||
%48 = OpConstant %4 0
|
||||
%50 = OpTypePointer StorageBuffer %19
|
||||
%52 = OpTypePointer StorageBuffer %22
|
||||
%56 = OpConstant %3 8.8991e-41
|
||||
%57 = OpConstant %3 2.4753e-41
|
||||
%59 = OpTypePointer Function %3
|
||||
%67 = OpTypePointer Uniform %6
|
||||
%76 = OpTypePointer Uniform %3
|
||||
%77 = OpConstant %4 3
|
||||
%84 = OpTypePointer StorageBuffer %3
|
||||
%91 = OpTypePointer StorageBuffer %7
|
||||
%92 = OpTypePointer Uniform %7
|
||||
%93 = OpConstant %4 4
|
||||
%100 = OpTypePointer StorageBuffer %8
|
||||
%101 = OpTypePointer Uniform %8
|
||||
%102 = OpConstant %4 5
|
||||
%109 = OpTypePointer StorageBuffer %9
|
||||
%110 = OpTypePointer Uniform %9
|
||||
%111 = OpConstant %4 6
|
||||
%118 = OpTypePointer StorageBuffer %10
|
||||
%119 = OpTypePointer Uniform %10
|
||||
%120 = OpConstant %4 8
|
||||
%133 = OpTypePointer StorageBuffer %11
|
||||
%134 = OpTypePointer Uniform %11
|
||||
%135 = OpConstant %4 9
|
||||
%148 = OpTypePointer StorageBuffer %12
|
||||
%149 = OpTypePointer Uniform %12
|
||||
%150 = OpConstant %4 10
|
||||
%163 = OpTypePointer StorageBuffer %13
|
||||
%164 = OpTypePointer Uniform %13
|
||||
%165 = OpConstant %4 11
|
||||
%181 = OpTypePointer StorageBuffer %14
|
||||
%182 = OpTypePointer Uniform %14
|
||||
%183 = OpConstant %4 12
|
||||
%199 = OpTypePointer StorageBuffer %15
|
||||
%200 = OpTypePointer Uniform %15
|
||||
%201 = OpConstant %4 13
|
||||
%217 = OpTypePointer StorageBuffer %16
|
||||
%218 = OpTypePointer Uniform %16
|
||||
%219 = OpConstant %4 14
|
||||
%238 = OpTypePointer StorageBuffer %17
|
||||
%239 = OpTypePointer Uniform %17
|
||||
%240 = OpConstant %4 15
|
||||
%259 = OpTypePointer StorageBuffer %18
|
||||
%260 = OpTypePointer Uniform %18
|
||||
%261 = OpConstant %4 16
|
||||
%280 = OpTypePointer StorageBuffer %20
|
||||
%319 = OpTypeVector %6 2
|
||||
%325 = OpTypeVector %6 3
|
||||
%331 = OpTypeVector %6 4
|
||||
%337 = OpTypeMatrix %319 2
|
||||
%351 = OpTypeMatrix %325 2
|
||||
%365 = OpTypeMatrix %331 2
|
||||
%379 = OpTypeMatrix %319 3
|
||||
%397 = OpTypeMatrix %325 3
|
||||
%415 = OpTypeMatrix %331 3
|
||||
%433 = OpTypeMatrix %319 4
|
||||
%455 = OpTypeMatrix %325 4
|
||||
%477 = OpTypeMatrix %331 4
|
||||
%500 = OpTypeFunction %2
|
||||
%506 = OpConstant %3 2.2959e-41
|
||||
%509 = OpConstant %4 7
|
||||
%45 = OpFunction %3 None %46
|
||||
%44 = OpFunctionParameter %3
|
||||
%43 = OpLabel
|
||||
%58 = OpVariable %59 Function %25
|
||||
%49 = OpAccessChain %47 %28 %48
|
||||
%51 = OpAccessChain %50 %31 %48
|
||||
%53 = OpAccessChain %52 %34 %48
|
||||
%54 = OpAccessChain %50 %37 %48
|
||||
%55 = OpAccessChain %52 %40 %48
|
||||
OpBranch %60
|
||||
%60 = OpLabel
|
||||
%61 = OpLoad %3 %58
|
||||
%62 = OpFAdd %3 %61 %56
|
||||
OpStore %58 %62
|
||||
%63 = OpLoad %3 %58
|
||||
%64 = OpFAdd %3 %63 %57
|
||||
%65 = OpLoad %3 %58
|
||||
%66 = OpFAdd %3 %65 %64
|
||||
OpStore %58 %66
|
||||
%68 = OpAccessChain %67 %49 %21
|
||||
%69 = OpLoad %6 %68
|
||||
%70 = OpLoad %3 %58
|
||||
%71 = OpFConvert %6 %70
|
||||
%72 = OpFAdd %6 %69 %71
|
||||
%73 = OpFConvert %3 %72
|
||||
%74 = OpLoad %3 %58
|
||||
%75 = OpFAdd %3 %74 %73
|
||||
OpStore %58 %75
|
||||
%78 = OpAccessChain %76 %49 %77
|
||||
%79 = OpLoad %3 %78
|
||||
%80 = OpCompositeConstruct %8 %79 %79 %79
|
||||
%81 = OpCompositeExtract %3 %80 2
|
||||
%82 = OpLoad %3 %58
|
||||
%83 = OpFAdd %3 %82 %81
|
||||
OpStore %58 %83
|
||||
%85 = OpAccessChain %76 %49 %77
|
||||
%86 = OpLoad %3 %85
|
||||
%87 = OpAccessChain %84 %51 %77
|
||||
%88 = OpLoad %3 %87
|
||||
%89 = OpFAdd %3 %86 %88
|
||||
%90 = OpAccessChain %84 %54 %77
|
||||
OpStore %90 %89
|
||||
%94 = OpAccessChain %92 %49 %93
|
||||
%95 = OpLoad %7 %94
|
||||
%96 = OpAccessChain %91 %51 %93
|
||||
%97 = OpLoad %7 %96
|
||||
%98 = OpFAdd %7 %95 %97
|
||||
%99 = OpAccessChain %91 %54 %93
|
||||
OpStore %99 %98
|
||||
%103 = OpAccessChain %101 %49 %102
|
||||
%104 = OpLoad %8 %103
|
||||
%105 = OpAccessChain %100 %51 %102
|
||||
%106 = OpLoad %8 %105
|
||||
%107 = OpFAdd %8 %104 %106
|
||||
%108 = OpAccessChain %100 %54 %102
|
||||
OpStore %108 %107
|
||||
%112 = OpAccessChain %110 %49 %111
|
||||
%113 = OpLoad %9 %112
|
||||
%114 = OpAccessChain %109 %51 %111
|
||||
%115 = OpLoad %9 %114
|
||||
%116 = OpFAdd %9 %113 %115
|
||||
%117 = OpAccessChain %109 %54 %111
|
||||
OpStore %117 %116
|
||||
%121 = OpAccessChain %119 %49 %120
|
||||
%122 = OpLoad %10 %121
|
||||
%123 = OpAccessChain %118 %51 %120
|
||||
%124 = OpLoad %10 %123
|
||||
%126 = OpCompositeExtract %7 %122 0
|
||||
%127 = OpCompositeExtract %7 %124 0
|
||||
%128 = OpFAdd %7 %126 %127
|
||||
%129 = OpCompositeExtract %7 %122 1
|
||||
%130 = OpCompositeExtract %7 %124 1
|
||||
%131 = OpFAdd %7 %129 %130
|
||||
%125 = OpCompositeConstruct %10 %128 %131
|
||||
%132 = OpAccessChain %118 %54 %120
|
||||
OpStore %132 %125
|
||||
%136 = OpAccessChain %134 %49 %135
|
||||
%137 = OpLoad %11 %136
|
||||
%138 = OpAccessChain %133 %51 %135
|
||||
%139 = OpLoad %11 %138
|
||||
%141 = OpCompositeExtract %8 %137 0
|
||||
%142 = OpCompositeExtract %8 %139 0
|
||||
%143 = OpFAdd %8 %141 %142
|
||||
%144 = OpCompositeExtract %8 %137 1
|
||||
%145 = OpCompositeExtract %8 %139 1
|
||||
%146 = OpFAdd %8 %144 %145
|
||||
%140 = OpCompositeConstruct %11 %143 %146
|
||||
%147 = OpAccessChain %133 %54 %135
|
||||
OpStore %147 %140
|
||||
%151 = OpAccessChain %149 %49 %150
|
||||
%152 = OpLoad %12 %151
|
||||
%153 = OpAccessChain %148 %51 %150
|
||||
%154 = OpLoad %12 %153
|
||||
%156 = OpCompositeExtract %9 %152 0
|
||||
%157 = OpCompositeExtract %9 %154 0
|
||||
%158 = OpFAdd %9 %156 %157
|
||||
%159 = OpCompositeExtract %9 %152 1
|
||||
%160 = OpCompositeExtract %9 %154 1
|
||||
%161 = OpFAdd %9 %159 %160
|
||||
%155 = OpCompositeConstruct %12 %158 %161
|
||||
%162 = OpAccessChain %148 %54 %150
|
||||
OpStore %162 %155
|
||||
%166 = OpAccessChain %164 %49 %165
|
||||
%167 = OpLoad %13 %166
|
||||
%168 = OpAccessChain %163 %51 %165
|
||||
%169 = OpLoad %13 %168
|
||||
%171 = OpCompositeExtract %7 %167 0
|
||||
%172 = OpCompositeExtract %7 %169 0
|
||||
%173 = OpFAdd %7 %171 %172
|
||||
%174 = OpCompositeExtract %7 %167 1
|
||||
%175 = OpCompositeExtract %7 %169 1
|
||||
%176 = OpFAdd %7 %174 %175
|
||||
%177 = OpCompositeExtract %7 %167 2
|
||||
%178 = OpCompositeExtract %7 %169 2
|
||||
%179 = OpFAdd %7 %177 %178
|
||||
%170 = OpCompositeConstruct %13 %173 %176 %179
|
||||
%180 = OpAccessChain %163 %54 %165
|
||||
OpStore %180 %170
|
||||
%184 = OpAccessChain %182 %49 %183
|
||||
%185 = OpLoad %14 %184
|
||||
%186 = OpAccessChain %181 %51 %183
|
||||
%187 = OpLoad %14 %186
|
||||
%189 = OpCompositeExtract %8 %185 0
|
||||
%190 = OpCompositeExtract %8 %187 0
|
||||
%191 = OpFAdd %8 %189 %190
|
||||
%192 = OpCompositeExtract %8 %185 1
|
||||
%193 = OpCompositeExtract %8 %187 1
|
||||
%194 = OpFAdd %8 %192 %193
|
||||
%195 = OpCompositeExtract %8 %185 2
|
||||
%196 = OpCompositeExtract %8 %187 2
|
||||
%197 = OpFAdd %8 %195 %196
|
||||
%188 = OpCompositeConstruct %14 %191 %194 %197
|
||||
%198 = OpAccessChain %181 %54 %183
|
||||
OpStore %198 %188
|
||||
%202 = OpAccessChain %200 %49 %201
|
||||
%203 = OpLoad %15 %202
|
||||
%204 = OpAccessChain %199 %51 %201
|
||||
%205 = OpLoad %15 %204
|
||||
%207 = OpCompositeExtract %9 %203 0
|
||||
%208 = OpCompositeExtract %9 %205 0
|
||||
%209 = OpFAdd %9 %207 %208
|
||||
%210 = OpCompositeExtract %9 %203 1
|
||||
%211 = OpCompositeExtract %9 %205 1
|
||||
%212 = OpFAdd %9 %210 %211
|
||||
%213 = OpCompositeExtract %9 %203 2
|
||||
%214 = OpCompositeExtract %9 %205 2
|
||||
%215 = OpFAdd %9 %213 %214
|
||||
%206 = OpCompositeConstruct %15 %209 %212 %215
|
||||
%216 = OpAccessChain %199 %54 %201
|
||||
OpStore %216 %206
|
||||
%220 = OpAccessChain %218 %49 %219
|
||||
%221 = OpLoad %16 %220
|
||||
%222 = OpAccessChain %217 %51 %219
|
||||
%223 = OpLoad %16 %222
|
||||
%225 = OpCompositeExtract %7 %221 0
|
||||
%226 = OpCompositeExtract %7 %223 0
|
||||
%227 = OpFAdd %7 %225 %226
|
||||
%228 = OpCompositeExtract %7 %221 1
|
||||
%229 = OpCompositeExtract %7 %223 1
|
||||
%230 = OpFAdd %7 %228 %229
|
||||
%231 = OpCompositeExtract %7 %221 2
|
||||
%232 = OpCompositeExtract %7 %223 2
|
||||
%233 = OpFAdd %7 %231 %232
|
||||
%234 = OpCompositeExtract %7 %221 3
|
||||
%235 = OpCompositeExtract %7 %223 3
|
||||
%236 = OpFAdd %7 %234 %235
|
||||
%224 = OpCompositeConstruct %16 %227 %230 %233 %236
|
||||
%237 = OpAccessChain %217 %54 %219
|
||||
OpStore %237 %224
|
||||
%241 = OpAccessChain %239 %49 %240
|
||||
%242 = OpLoad %17 %241
|
||||
%243 = OpAccessChain %238 %51 %240
|
||||
%244 = OpLoad %17 %243
|
||||
%246 = OpCompositeExtract %8 %242 0
|
||||
%247 = OpCompositeExtract %8 %244 0
|
||||
%248 = OpFAdd %8 %246 %247
|
||||
%249 = OpCompositeExtract %8 %242 1
|
||||
%250 = OpCompositeExtract %8 %244 1
|
||||
%251 = OpFAdd %8 %249 %250
|
||||
%252 = OpCompositeExtract %8 %242 2
|
||||
%253 = OpCompositeExtract %8 %244 2
|
||||
%254 = OpFAdd %8 %252 %253
|
||||
%255 = OpCompositeExtract %8 %242 3
|
||||
%256 = OpCompositeExtract %8 %244 3
|
||||
%257 = OpFAdd %8 %255 %256
|
||||
%245 = OpCompositeConstruct %17 %248 %251 %254 %257
|
||||
%258 = OpAccessChain %238 %54 %240
|
||||
OpStore %258 %245
|
||||
%262 = OpAccessChain %260 %49 %261
|
||||
%263 = OpLoad %18 %262
|
||||
%264 = OpAccessChain %259 %51 %261
|
||||
%265 = OpLoad %18 %264
|
||||
%267 = OpCompositeExtract %9 %263 0
|
||||
%268 = OpCompositeExtract %9 %265 0
|
||||
%269 = OpFAdd %9 %267 %268
|
||||
%270 = OpCompositeExtract %9 %263 1
|
||||
%271 = OpCompositeExtract %9 %265 1
|
||||
%272 = OpFAdd %9 %270 %271
|
||||
%273 = OpCompositeExtract %9 %263 2
|
||||
%274 = OpCompositeExtract %9 %265 2
|
||||
%275 = OpFAdd %9 %273 %274
|
||||
%276 = OpCompositeExtract %9 %263 3
|
||||
%277 = OpCompositeExtract %9 %265 3
|
||||
%278 = OpFAdd %9 %276 %277
|
||||
%266 = OpCompositeConstruct %18 %269 %272 %275 %278
|
||||
%279 = OpAccessChain %259 %54 %261
|
||||
OpStore %279 %266
|
||||
%281 = OpAccessChain %280 %53 %48
|
||||
%282 = OpLoad %20 %281
|
||||
%283 = OpAccessChain %280 %55 %48
|
||||
OpStore %283 %282
|
||||
%284 = OpLoad %3 %58
|
||||
%285 = OpExtInst %3 %1 FAbs %284
|
||||
%286 = OpLoad %3 %58
|
||||
%287 = OpFAdd %3 %286 %285
|
||||
OpStore %58 %287
|
||||
%288 = OpLoad %3 %58
|
||||
%289 = OpLoad %3 %58
|
||||
%290 = OpLoad %3 %58
|
||||
%291 = OpExtInst %3 %1 FClamp %288 %289 %290
|
||||
%292 = OpLoad %3 %58
|
||||
%293 = OpFAdd %3 %292 %291
|
||||
OpStore %58 %293
|
||||
%294 = OpLoad %3 %58
|
||||
%295 = OpCompositeConstruct %7 %294 %294
|
||||
%296 = OpLoad %3 %58
|
||||
%297 = OpCompositeConstruct %7 %296 %296
|
||||
%298 = OpDot %3 %295 %297
|
||||
%299 = OpLoad %3 %58
|
||||
%300 = OpFAdd %3 %299 %298
|
||||
OpStore %58 %300
|
||||
%301 = OpLoad %3 %58
|
||||
%302 = OpLoad %3 %58
|
||||
%303 = OpExtInst %3 %1 FMax %301 %302
|
||||
%304 = OpLoad %3 %58
|
||||
%305 = OpFAdd %3 %304 %303
|
||||
OpStore %58 %305
|
||||
%306 = OpLoad %3 %58
|
||||
%307 = OpLoad %3 %58
|
||||
%308 = OpExtInst %3 %1 FMin %306 %307
|
||||
%309 = OpLoad %3 %58
|
||||
%310 = OpFAdd %3 %309 %308
|
||||
OpStore %58 %310
|
||||
%311 = OpLoad %3 %58
|
||||
%312 = OpExtInst %3 %1 FSign %311
|
||||
%313 = OpLoad %3 %58
|
||||
%314 = OpFAdd %3 %313 %312
|
||||
OpStore %58 %314
|
||||
%315 = OpLoad %3 %58
|
||||
%316 = OpFAdd %3 %315 %24
|
||||
OpStore %58 %316
|
||||
%317 = OpAccessChain %92 %49 %93
|
||||
%318 = OpLoad %7 %317
|
||||
%320 = OpFConvert %319 %318
|
||||
%321 = OpFConvert %7 %320
|
||||
%322 = OpAccessChain %91 %54 %93
|
||||
OpStore %322 %321
|
||||
%323 = OpAccessChain %101 %49 %102
|
||||
%324 = OpLoad %8 %323
|
||||
%326 = OpFConvert %325 %324
|
||||
%327 = OpFConvert %8 %326
|
||||
%328 = OpAccessChain %100 %54 %102
|
||||
OpStore %328 %327
|
||||
%329 = OpAccessChain %110 %49 %111
|
||||
%330 = OpLoad %9 %329
|
||||
%332 = OpFConvert %331 %330
|
||||
%333 = OpFConvert %9 %332
|
||||
%334 = OpAccessChain %109 %54 %111
|
||||
OpStore %334 %333
|
||||
%335 = OpAccessChain %119 %49 %120
|
||||
%336 = OpLoad %10 %335
|
||||
%338 = OpCompositeExtract %7 %336 0
|
||||
%339 = OpFConvert %319 %338
|
||||
%340 = OpCompositeExtract %7 %336 1
|
||||
%341 = OpFConvert %319 %340
|
||||
%342 = OpCompositeConstruct %337 %339 %341
|
||||
%343 = OpCompositeExtract %319 %342 0
|
||||
%344 = OpFConvert %7 %343
|
||||
%345 = OpCompositeExtract %319 %342 1
|
||||
%346 = OpFConvert %7 %345
|
||||
%347 = OpCompositeConstruct %10 %344 %346
|
||||
%348 = OpAccessChain %118 %54 %120
|
||||
OpStore %348 %347
|
||||
%349 = OpAccessChain %134 %49 %135
|
||||
%350 = OpLoad %11 %349
|
||||
%352 = OpCompositeExtract %8 %350 0
|
||||
%353 = OpFConvert %325 %352
|
||||
%354 = OpCompositeExtract %8 %350 1
|
||||
%355 = OpFConvert %325 %354
|
||||
%356 = OpCompositeConstruct %351 %353 %355
|
||||
%357 = OpCompositeExtract %325 %356 0
|
||||
%358 = OpFConvert %8 %357
|
||||
%359 = OpCompositeExtract %325 %356 1
|
||||
%360 = OpFConvert %8 %359
|
||||
%361 = OpCompositeConstruct %11 %358 %360
|
||||
%362 = OpAccessChain %133 %54 %135
|
||||
OpStore %362 %361
|
||||
%363 = OpAccessChain %149 %49 %150
|
||||
%364 = OpLoad %12 %363
|
||||
%366 = OpCompositeExtract %9 %364 0
|
||||
%367 = OpFConvert %331 %366
|
||||
%368 = OpCompositeExtract %9 %364 1
|
||||
%369 = OpFConvert %331 %368
|
||||
%370 = OpCompositeConstruct %365 %367 %369
|
||||
%371 = OpCompositeExtract %331 %370 0
|
||||
%372 = OpFConvert %9 %371
|
||||
%373 = OpCompositeExtract %331 %370 1
|
||||
%374 = OpFConvert %9 %373
|
||||
%375 = OpCompositeConstruct %12 %372 %374
|
||||
%376 = OpAccessChain %148 %54 %150
|
||||
OpStore %376 %375
|
||||
%377 = OpAccessChain %164 %49 %165
|
||||
%378 = OpLoad %13 %377
|
||||
%380 = OpCompositeExtract %7 %378 0
|
||||
%381 = OpFConvert %319 %380
|
||||
%382 = OpCompositeExtract %7 %378 1
|
||||
%383 = OpFConvert %319 %382
|
||||
%384 = OpCompositeExtract %7 %378 2
|
||||
%385 = OpFConvert %319 %384
|
||||
%386 = OpCompositeConstruct %379 %381 %383 %385
|
||||
%387 = OpCompositeExtract %319 %386 0
|
||||
%388 = OpFConvert %7 %387
|
||||
%389 = OpCompositeExtract %319 %386 1
|
||||
%390 = OpFConvert %7 %389
|
||||
%391 = OpCompositeExtract %319 %386 2
|
||||
%392 = OpFConvert %7 %391
|
||||
%393 = OpCompositeConstruct %13 %388 %390 %392
|
||||
%394 = OpAccessChain %163 %54 %165
|
||||
OpStore %394 %393
|
||||
%395 = OpAccessChain %182 %49 %183
|
||||
%396 = OpLoad %14 %395
|
||||
%398 = OpCompositeExtract %8 %396 0
|
||||
%399 = OpFConvert %325 %398
|
||||
%400 = OpCompositeExtract %8 %396 1
|
||||
%401 = OpFConvert %325 %400
|
||||
%402 = OpCompositeExtract %8 %396 2
|
||||
%403 = OpFConvert %325 %402
|
||||
%404 = OpCompositeConstruct %397 %399 %401 %403
|
||||
%405 = OpCompositeExtract %325 %404 0
|
||||
%406 = OpFConvert %8 %405
|
||||
%407 = OpCompositeExtract %325 %404 1
|
||||
%408 = OpFConvert %8 %407
|
||||
%409 = OpCompositeExtract %325 %404 2
|
||||
%410 = OpFConvert %8 %409
|
||||
%411 = OpCompositeConstruct %14 %406 %408 %410
|
||||
%412 = OpAccessChain %181 %54 %183
|
||||
OpStore %412 %411
|
||||
%413 = OpAccessChain %200 %49 %201
|
||||
%414 = OpLoad %15 %413
|
||||
%416 = OpCompositeExtract %9 %414 0
|
||||
%417 = OpFConvert %331 %416
|
||||
%418 = OpCompositeExtract %9 %414 1
|
||||
%419 = OpFConvert %331 %418
|
||||
%420 = OpCompositeExtract %9 %414 2
|
||||
%421 = OpFConvert %331 %420
|
||||
%422 = OpCompositeConstruct %415 %417 %419 %421
|
||||
%423 = OpCompositeExtract %331 %422 0
|
||||
%424 = OpFConvert %9 %423
|
||||
%425 = OpCompositeExtract %331 %422 1
|
||||
%426 = OpFConvert %9 %425
|
||||
%427 = OpCompositeExtract %331 %422 2
|
||||
%428 = OpFConvert %9 %427
|
||||
%429 = OpCompositeConstruct %15 %424 %426 %428
|
||||
%430 = OpAccessChain %199 %54 %201
|
||||
OpStore %430 %429
|
||||
%431 = OpAccessChain %218 %49 %219
|
||||
%432 = OpLoad %16 %431
|
||||
%434 = OpCompositeExtract %7 %432 0
|
||||
%435 = OpFConvert %319 %434
|
||||
%436 = OpCompositeExtract %7 %432 1
|
||||
%437 = OpFConvert %319 %436
|
||||
%438 = OpCompositeExtract %7 %432 2
|
||||
%439 = OpFConvert %319 %438
|
||||
%440 = OpCompositeExtract %7 %432 3
|
||||
%441 = OpFConvert %319 %440
|
||||
%442 = OpCompositeConstruct %433 %435 %437 %439 %441
|
||||
%443 = OpCompositeExtract %319 %442 0
|
||||
%444 = OpFConvert %7 %443
|
||||
%445 = OpCompositeExtract %319 %442 1
|
||||
%446 = OpFConvert %7 %445
|
||||
%447 = OpCompositeExtract %319 %442 2
|
||||
%448 = OpFConvert %7 %447
|
||||
%449 = OpCompositeExtract %319 %442 3
|
||||
%450 = OpFConvert %7 %449
|
||||
%451 = OpCompositeConstruct %16 %444 %446 %448 %450
|
||||
%452 = OpAccessChain %217 %54 %219
|
||||
OpStore %452 %451
|
||||
%453 = OpAccessChain %239 %49 %240
|
||||
%454 = OpLoad %17 %453
|
||||
%456 = OpCompositeExtract %8 %454 0
|
||||
%457 = OpFConvert %325 %456
|
||||
%458 = OpCompositeExtract %8 %454 1
|
||||
%459 = OpFConvert %325 %458
|
||||
%460 = OpCompositeExtract %8 %454 2
|
||||
%461 = OpFConvert %325 %460
|
||||
%462 = OpCompositeExtract %8 %454 3
|
||||
%463 = OpFConvert %325 %462
|
||||
%464 = OpCompositeConstruct %455 %457 %459 %461 %463
|
||||
%465 = OpCompositeExtract %325 %464 0
|
||||
%466 = OpFConvert %8 %465
|
||||
%467 = OpCompositeExtract %325 %464 1
|
||||
%468 = OpFConvert %8 %467
|
||||
%469 = OpCompositeExtract %325 %464 2
|
||||
%470 = OpFConvert %8 %469
|
||||
%471 = OpCompositeExtract %325 %464 3
|
||||
%472 = OpFConvert %8 %471
|
||||
%473 = OpCompositeConstruct %17 %466 %468 %470 %472
|
||||
%474 = OpAccessChain %238 %54 %240
|
||||
OpStore %474 %473
|
||||
%475 = OpAccessChain %260 %49 %261
|
||||
%476 = OpLoad %18 %475
|
||||
%478 = OpCompositeExtract %9 %476 0
|
||||
%479 = OpFConvert %331 %478
|
||||
%480 = OpCompositeExtract %9 %476 1
|
||||
%481 = OpFConvert %331 %480
|
||||
%482 = OpCompositeExtract %9 %476 2
|
||||
%483 = OpFConvert %331 %482
|
||||
%484 = OpCompositeExtract %9 %476 3
|
||||
%485 = OpFConvert %331 %484
|
||||
%486 = OpCompositeConstruct %477 %479 %481 %483 %485
|
||||
%487 = OpCompositeExtract %331 %486 0
|
||||
%488 = OpFConvert %9 %487
|
||||
%489 = OpCompositeExtract %331 %486 1
|
||||
%490 = OpFConvert %9 %489
|
||||
%491 = OpCompositeExtract %331 %486 2
|
||||
%492 = OpFConvert %9 %491
|
||||
%493 = OpCompositeExtract %331 %486 3
|
||||
%494 = OpFConvert %9 %493
|
||||
%495 = OpCompositeConstruct %18 %488 %490 %492 %494
|
||||
%496 = OpAccessChain %259 %54 %261
|
||||
OpStore %496 %495
|
||||
%497 = OpLoad %3 %58
|
||||
OpReturnValue %497
|
||||
OpFunctionEnd
|
||||
%499 = OpFunction %2 None %500
|
||||
%498 = OpLabel
|
||||
%501 = OpAccessChain %47 %28 %48
|
||||
%502 = OpAccessChain %50 %31 %48
|
||||
%503 = OpAccessChain %52 %34 %48
|
||||
%504 = OpAccessChain %50 %37 %48
|
||||
%505 = OpAccessChain %52 %40 %48
|
||||
OpBranch %507
|
||||
%507 = OpLabel
|
||||
%508 = OpFunctionCall %3 %45 %506
|
||||
%510 = OpAccessChain %84 %504 %509
|
||||
OpStore %510 %508
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
struct PushConstants {
|
||||
index: u32,
|
||||
double: vec2<f64>,
|
||||
double: vec2<f32>,
|
||||
}
|
||||
|
||||
struct FragmentIn {
|
||||
|
||||
47
naga/tests/out/wgsl/f16-glsl.comp.wgsl
Normal file
47
naga/tests/out/wgsl/f16-glsl.comp.wgsl
Normal file
@@ -0,0 +1,47 @@
|
||||
enable f16;
|
||||
|
||||
struct A {
|
||||
a_1_: f16,
|
||||
a_vec2_: vec2<f16>,
|
||||
a_vec3_: vec3<f16>,
|
||||
a_vec4_: vec4<f16>,
|
||||
}
|
||||
|
||||
struct B {
|
||||
b_1_: f16,
|
||||
b_vec2_: vec2<f16>,
|
||||
b_vec3_: vec3<f16>,
|
||||
b_vec4_: vec4<f16>,
|
||||
b_mat2_: mat2x2<f16>,
|
||||
b_mat2x3_: mat2x3<f16>,
|
||||
b_mat2x4_: mat2x4<f16>,
|
||||
b_mat3x2_: mat3x2<f16>,
|
||||
b_mat3_: mat3x3<f16>,
|
||||
b_mat3x4_: mat3x4<f16>,
|
||||
b_mat4x2_: mat4x2<f16>,
|
||||
b_mat4x3_: mat4x3<f16>,
|
||||
b_mat4_: mat4x4<f16>,
|
||||
}
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<uniform> global: A;
|
||||
@group(0) @binding(1)
|
||||
var<storage, read_write> global_1: B;
|
||||
|
||||
fn main_1() {
|
||||
let _e16 = global.a_1_;
|
||||
global_1.b_1_ = _e16;
|
||||
let _e17 = global.a_vec2_;
|
||||
global_1.b_vec2_ = _e17;
|
||||
let _e18 = global.a_vec3_;
|
||||
global_1.b_vec3_ = _e18;
|
||||
let _e19 = global.a_vec4_;
|
||||
global_1.b_vec4_ = _e19;
|
||||
return;
|
||||
}
|
||||
|
||||
@compute @workgroup_size(1, 1, 1)
|
||||
fn main() {
|
||||
main_1();
|
||||
return;
|
||||
}
|
||||
46
naga/tests/out/wgsl/f16-spv.wgsl
Normal file
46
naga/tests/out/wgsl/f16-spv.wgsl
Normal file
@@ -0,0 +1,46 @@
|
||||
enable f16;
|
||||
|
||||
struct B {
|
||||
b_1_: f16,
|
||||
b_vec2_: vec2<f16>,
|
||||
b_vec3_: vec3<f16>,
|
||||
b_vec4_: vec4<f16>,
|
||||
b_mat2_: mat2x2<f16>,
|
||||
b_mat2x3_: mat2x3<f16>,
|
||||
b_mat2x4_: mat2x4<f16>,
|
||||
b_mat3x2_: mat3x2<f16>,
|
||||
b_mat3_: mat3x3<f16>,
|
||||
b_mat3x4_: mat3x4<f16>,
|
||||
b_mat4x2_: mat4x2<f16>,
|
||||
b_mat4x3_: mat4x3<f16>,
|
||||
b_mat4_: mat4x4<f16>,
|
||||
}
|
||||
|
||||
struct A {
|
||||
a_1_: f16,
|
||||
a_vec2_: vec2<f16>,
|
||||
a_vec3_: vec3<f16>,
|
||||
a_vec4_: vec4<f16>,
|
||||
}
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage, read_write> unnamed: B;
|
||||
@group(0) @binding(0)
|
||||
var<uniform> unnamed_1: A;
|
||||
|
||||
fn main_1() {
|
||||
let _e3 = unnamed_1.a_1_;
|
||||
unnamed.b_1_ = _e3;
|
||||
let _e6 = unnamed_1.a_vec2_;
|
||||
unnamed.b_vec2_ = _e6;
|
||||
let _e9 = unnamed_1.a_vec3_;
|
||||
unnamed.b_vec3_ = _e9;
|
||||
let _e12 = unnamed_1.a_vec4_;
|
||||
unnamed.b_vec4_ = _e12;
|
||||
return;
|
||||
}
|
||||
|
||||
@compute @workgroup_size(1, 1, 1)
|
||||
fn main() {
|
||||
main_1();
|
||||
}
|
||||
167
naga/tests/out/wgsl/f16.wgsl
Normal file
167
naga/tests/out/wgsl/f16.wgsl
Normal file
@@ -0,0 +1,167 @@
|
||||
enable f16;
|
||||
|
||||
struct UniformCompatible {
|
||||
val_u32_: u32,
|
||||
val_i32_: i32,
|
||||
val_f32_: f32,
|
||||
val_f16_: f16,
|
||||
val_f16_2_: vec2<f16>,
|
||||
val_f16_3_: vec3<f16>,
|
||||
val_f16_4_: vec4<f16>,
|
||||
final_value: f16,
|
||||
val_mat2x2_: mat2x2<f16>,
|
||||
val_mat2x3_: mat2x3<f16>,
|
||||
val_mat2x4_: mat2x4<f16>,
|
||||
val_mat3x2_: mat3x2<f16>,
|
||||
val_mat3x3_: mat3x3<f16>,
|
||||
val_mat3x4_: mat3x4<f16>,
|
||||
val_mat4x2_: mat4x2<f16>,
|
||||
val_mat4x3_: mat4x3<f16>,
|
||||
val_mat4x4_: mat4x4<f16>,
|
||||
}
|
||||
|
||||
struct StorageCompatible {
|
||||
val_f16_array_2_: array<f16, 2>,
|
||||
}
|
||||
|
||||
struct LayoutTest {
|
||||
scalar1_: f16,
|
||||
scalar2_: f16,
|
||||
v3_: vec3<f16>,
|
||||
tuck_in: f16,
|
||||
scalar4_: f16,
|
||||
larger: u32,
|
||||
}
|
||||
|
||||
const constant_variable: f16 = 15.203125h;
|
||||
|
||||
var<private> private_variable: f16 = 1h;
|
||||
@group(0) @binding(0)
|
||||
var<uniform> input_uniform: UniformCompatible;
|
||||
@group(0) @binding(1)
|
||||
var<storage> input_storage: UniformCompatible;
|
||||
@group(0) @binding(2)
|
||||
var<storage> input_arrays: StorageCompatible;
|
||||
@group(0) @binding(3)
|
||||
var<storage, read_write> output: UniformCompatible;
|
||||
@group(0) @binding(4)
|
||||
var<storage, read_write> output_arrays: StorageCompatible;
|
||||
|
||||
fn f16_function(x: f16) -> f16 {
|
||||
var val: f16 = 15.203125h;
|
||||
|
||||
let _e4 = val;
|
||||
val = (_e4 + -33344h);
|
||||
let _e6 = val;
|
||||
let _e9 = val;
|
||||
val = (_e9 + (_e6 + 5h));
|
||||
let _e13 = input_uniform.val_f32_;
|
||||
let _e14 = val;
|
||||
let _e18 = val;
|
||||
val = (_e18 + f16((_e13 + f32(_e14))));
|
||||
let _e22 = input_uniform.val_f16_;
|
||||
let _e25 = val;
|
||||
val = (_e25 + vec3(_e22).z);
|
||||
let _e31 = input_uniform.val_f16_;
|
||||
let _e34 = input_storage.val_f16_;
|
||||
output.val_f16_ = (_e31 + _e34);
|
||||
let _e40 = input_uniform.val_f16_2_;
|
||||
let _e43 = input_storage.val_f16_2_;
|
||||
output.val_f16_2_ = (_e40 + _e43);
|
||||
let _e49 = input_uniform.val_f16_3_;
|
||||
let _e52 = input_storage.val_f16_3_;
|
||||
output.val_f16_3_ = (_e49 + _e52);
|
||||
let _e58 = input_uniform.val_f16_4_;
|
||||
let _e61 = input_storage.val_f16_4_;
|
||||
output.val_f16_4_ = (_e58 + _e61);
|
||||
let _e67 = input_uniform.val_mat2x2_;
|
||||
let _e70 = input_storage.val_mat2x2_;
|
||||
output.val_mat2x2_ = (_e67 + _e70);
|
||||
let _e76 = input_uniform.val_mat2x3_;
|
||||
let _e79 = input_storage.val_mat2x3_;
|
||||
output.val_mat2x3_ = (_e76 + _e79);
|
||||
let _e85 = input_uniform.val_mat2x4_;
|
||||
let _e88 = input_storage.val_mat2x4_;
|
||||
output.val_mat2x4_ = (_e85 + _e88);
|
||||
let _e94 = input_uniform.val_mat3x2_;
|
||||
let _e97 = input_storage.val_mat3x2_;
|
||||
output.val_mat3x2_ = (_e94 + _e97);
|
||||
let _e103 = input_uniform.val_mat3x3_;
|
||||
let _e106 = input_storage.val_mat3x3_;
|
||||
output.val_mat3x3_ = (_e103 + _e106);
|
||||
let _e112 = input_uniform.val_mat3x4_;
|
||||
let _e115 = input_storage.val_mat3x4_;
|
||||
output.val_mat3x4_ = (_e112 + _e115);
|
||||
let _e121 = input_uniform.val_mat4x2_;
|
||||
let _e124 = input_storage.val_mat4x2_;
|
||||
output.val_mat4x2_ = (_e121 + _e124);
|
||||
let _e130 = input_uniform.val_mat4x3_;
|
||||
let _e133 = input_storage.val_mat4x3_;
|
||||
output.val_mat4x3_ = (_e130 + _e133);
|
||||
let _e139 = input_uniform.val_mat4x4_;
|
||||
let _e142 = input_storage.val_mat4x4_;
|
||||
output.val_mat4x4_ = (_e139 + _e142);
|
||||
let _e148 = input_arrays.val_f16_array_2_;
|
||||
output_arrays.val_f16_array_2_ = _e148;
|
||||
let _e149 = val;
|
||||
let _e151 = val;
|
||||
val = (_e151 + abs(_e149));
|
||||
let _e153 = val;
|
||||
let _e154 = val;
|
||||
let _e155 = val;
|
||||
let _e157 = val;
|
||||
val = (_e157 + clamp(_e153, _e154, _e155));
|
||||
let _e159 = val;
|
||||
let _e161 = val;
|
||||
let _e164 = val;
|
||||
val = (_e164 + dot(vec2(_e159), vec2(_e161)));
|
||||
let _e166 = val;
|
||||
let _e167 = val;
|
||||
let _e169 = val;
|
||||
val = (_e169 + max(_e166, _e167));
|
||||
let _e171 = val;
|
||||
let _e172 = val;
|
||||
let _e174 = val;
|
||||
val = (_e174 + min(_e171, _e172));
|
||||
let _e176 = val;
|
||||
let _e178 = val;
|
||||
val = (_e178 + sign(_e176));
|
||||
let _e181 = val;
|
||||
val = (_e181 + 1h);
|
||||
let _e185 = input_uniform.val_f16_2_;
|
||||
let float_vec2_ = vec2<f32>(_e185);
|
||||
output.val_f16_2_ = vec2<f16>(float_vec2_);
|
||||
let _e192 = input_uniform.val_f16_3_;
|
||||
let float_vec3_ = vec3<f32>(_e192);
|
||||
output.val_f16_3_ = vec3<f16>(float_vec3_);
|
||||
let _e199 = input_uniform.val_f16_4_;
|
||||
let float_vec4_ = vec4<f32>(_e199);
|
||||
output.val_f16_4_ = vec4<f16>(float_vec4_);
|
||||
let _e208 = input_uniform.val_mat2x2_;
|
||||
output.val_mat2x2_ = mat2x2<f16>(mat2x2<f32>(_e208));
|
||||
let _e215 = input_uniform.val_mat2x3_;
|
||||
output.val_mat2x3_ = mat2x3<f16>(mat2x3<f32>(_e215));
|
||||
let _e222 = input_uniform.val_mat2x4_;
|
||||
output.val_mat2x4_ = mat2x4<f16>(mat2x4<f32>(_e222));
|
||||
let _e229 = input_uniform.val_mat3x2_;
|
||||
output.val_mat3x2_ = mat3x2<f16>(mat3x2<f32>(_e229));
|
||||
let _e236 = input_uniform.val_mat3x3_;
|
||||
output.val_mat3x3_ = mat3x3<f16>(mat3x3<f32>(_e236));
|
||||
let _e243 = input_uniform.val_mat3x4_;
|
||||
output.val_mat3x4_ = mat3x4<f16>(mat3x4<f32>(_e243));
|
||||
let _e250 = input_uniform.val_mat4x2_;
|
||||
output.val_mat4x2_ = mat4x2<f16>(mat4x2<f32>(_e250));
|
||||
let _e257 = input_uniform.val_mat4x3_;
|
||||
output.val_mat4x3_ = mat4x3<f16>(mat4x3<f32>(_e257));
|
||||
let _e264 = input_uniform.val_mat4x4_;
|
||||
output.val_mat4x4_ = mat4x4<f16>(mat4x4<f32>(_e264));
|
||||
let _e267 = val;
|
||||
return _e267;
|
||||
}
|
||||
|
||||
@compute @workgroup_size(1, 1, 1)
|
||||
fn main() {
|
||||
let _e3 = f16_function(2h);
|
||||
output.final_value = _e3;
|
||||
return;
|
||||
}
|
||||
@@ -208,3 +208,8 @@ fn int64() {
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16() {
|
||||
require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@ use naga::valid::Capabilities;
|
||||
#[track_caller]
|
||||
fn check(input: &str, snapshot: &str) {
|
||||
let output = naga::front::wgsl::parse_str(input)
|
||||
.expect_err("expected parser error")
|
||||
.map(|_| panic!("expected parser error, but parsing succeeded!"))
|
||||
.unwrap_err()
|
||||
.emit_to_string(input);
|
||||
if output != snapshot {
|
||||
for diff in diff::lines(snapshot, &output) {
|
||||
@@ -22,6 +23,19 @@ fn check(input: &str, snapshot: &str) {
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn check_success(input: &str) {
|
||||
match naga::front::wgsl::parse_str(input) {
|
||||
Ok(_) => {}
|
||||
Err(err) => {
|
||||
panic!(
|
||||
"expected success, but parsing failed with:\n{}",
|
||||
err.emit_to_string(input)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn very_negative_integers() {
|
||||
// wgpu#4492
|
||||
@@ -827,6 +841,50 @@ fn matrix_constructor_inferred() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_requires_enable() {
|
||||
check(
|
||||
r#"
|
||||
const a: f16 = 1.0;
|
||||
"#,
|
||||
r#"error: the `f16` enable extension is not enabled
|
||||
┌─ wgsl:2:22
|
||||
│
|
||||
2 │ const a: f16 = 1.0;
|
||||
│ ^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
|
||||
│
|
||||
= note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
|
||||
|
||||
"#,
|
||||
);
|
||||
|
||||
check(
|
||||
r#"
|
||||
const a = 1.0h;
|
||||
"#,
|
||||
r#"error: the `f16` enable extension is not enabled
|
||||
┌─ wgsl:2:23
|
||||
│
|
||||
2 │ const a = 1.0h;
|
||||
│ ^^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
|
||||
│
|
||||
= note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
|
||||
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_enables_valid() {
|
||||
check_success(
|
||||
r#"
|
||||
enable f16;
|
||||
enable f16;
|
||||
const a: f16 = 1.0h;
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
/// Check the result of validating a WGSL program against a pattern.
|
||||
///
|
||||
/// Unless you are generating code programmatically, the
|
||||
@@ -932,6 +990,53 @@ fn int64_capability() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_capability() {
|
||||
check_validation! {
|
||||
"enable f16; var input: f16;",
|
||||
"enable f16; var input: vec2<f16>;":
|
||||
Err(naga::valid::ValidationError::Type {
|
||||
source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}),
|
||||
..
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_in_push_constant() {
|
||||
check_validation! {
|
||||
"enable f16; var<push_constant> input: f16;",
|
||||
"enable f16; var<push_constant> input: vec2<f16>;",
|
||||
"enable f16; var<push_constant> input: mat4x4<f16>;",
|
||||
"enable f16; struct S { a: f16 }; var<push_constant> input: S;",
|
||||
"enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var<push_constant> input: S2;":
|
||||
Err(naga::valid::ValidationError::GlobalVariable {
|
||||
source: naga::valid::GlobalVariableError::InvalidPushConstantType(
|
||||
naga::valid::PushConstantError::InvalidScalar(
|
||||
naga::Scalar::F16
|
||||
)
|
||||
),
|
||||
..
|
||||
}),
|
||||
naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float16_in_atomic() {
|
||||
check_validation! {
|
||||
"enable f16; var<storage> a: atomic<f16>;":
|
||||
Err(naga::valid::ValidationError::Type {
|
||||
source: naga::valid::TypeError::InvalidAtomicWidth(
|
||||
naga::ScalarKind::Float,
|
||||
2
|
||||
),
|
||||
..
|
||||
}),
|
||||
naga::valid::Capabilities::SHADER_FLOAT16
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_arrays() {
|
||||
check_validation! {
|
||||
@@ -1356,11 +1461,13 @@ fn invalid_blend_src() {
|
||||
@fragment
|
||||
fn main(@builtin(position) position: vec4<f32>) -> FragmentOutput { return FragmentOutput(vec4(0.0), vec4(0.0)); }
|
||||
",
|
||||
r###"error: `dual_source_blending` enable-extension is not enabled
|
||||
r###"error: the `dual_source_blending` enable extension is not enabled
|
||||
┌─ wgsl:3:27
|
||||
│
|
||||
3 │ @location(0) @blend_src(0) output0: vec4<f32>,
|
||||
│ ^^^^^^^^^ the `dual_source_blending` enable-extension is needed for this functionality, but it is not currently enabled
|
||||
│ ^^^^^^^^^ the `dual_source_blending` "Enable Extension" is needed for this functionality, but it is not currently enabled.
|
||||
│
|
||||
= note: You can enable this extension by adding `enable dual_source_blending;` at the top of the shader, before any other items.
|
||||
|
||||
"###,
|
||||
);
|
||||
|
||||
@@ -49,6 +49,7 @@ cfg-if.workspace = true
|
||||
ctor.workspace = true
|
||||
futures-lite.workspace = true
|
||||
glam.workspace = true
|
||||
half = { workspace = true, features = ["bytemuck"] }
|
||||
itertools.workspace = true
|
||||
image.workspace = true
|
||||
libtest-mimic.workspace = true
|
||||
|
||||
@@ -143,8 +143,8 @@ impl ShaderTest {
|
||||
body,
|
||||
input_type: String::from("CustomStruct"),
|
||||
output_type: String::from("array<u32>"),
|
||||
input_values: bytemuck::cast_slice(input_values).to_vec(),
|
||||
output_values: vec![bytemuck::cast_slice(output_values).to_vec()],
|
||||
input_values: bytemuck::pod_collect_to_vec(input_values),
|
||||
output_values: vec![bytemuck::pod_collect_to_vec(output_values)],
|
||||
output_comparison_fn: Self::default_comparison_function::<O>,
|
||||
output_initialization: u32::MAX,
|
||||
failures: Backends::empty(),
|
||||
|
||||
@@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits};
|
||||
use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
|
||||
use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
|
||||
|
||||
#[gpu_test]
|
||||
static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
// Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
|
||||
.expect_fail(
|
||||
FailureCase::backend(wgpu::Backends::VULKAN)
|
||||
.validation_error("a matrix with stride 8 not satisfying alignment to 16"),
|
||||
)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Uniform,
|
||||
create_struct_layout_tests(InputStorageType::Uniform),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Storage,
|
||||
create_struct_layout_tests(InputStorageType::Storage),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::PUSH_CONSTANTS)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits {
|
||||
max_push_constant_size: MAX_BUFFER_SIZE as u32,
|
||||
..Limits::downlevel_defaults()
|
||||
}),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::PushConstant,
|
||||
create_struct_layout_tests(InputStorageType::PushConstant),
|
||||
)
|
||||
});
|
||||
|
||||
fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
|
||||
let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
|
||||
|
||||
@@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
|
||||
tests
|
||||
}
|
||||
|
||||
#[gpu_test]
|
||||
static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_INT64)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Storage,
|
||||
create_64bit_struct_layout_tests(),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_INT64)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Storage,
|
||||
create_64bit_struct_layout_tests(),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits {
|
||||
max_push_constant_size: MAX_BUFFER_SIZE as u32,
|
||||
..Limits::downlevel_defaults()
|
||||
}),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::PushConstant,
|
||||
create_64bit_struct_layout_tests(),
|
||||
)
|
||||
});
|
||||
|
||||
fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
|
||||
let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
|
||||
|
||||
@@ -356,29 +461,10 @@ fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
|
||||
}
|
||||
|
||||
#[gpu_test]
|
||||
static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
// Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
|
||||
.expect_fail(
|
||||
FailureCase::backend(wgpu::Backends::VULKAN)
|
||||
.validation_error("a matrix with stride 8 not satisfying alignment to 16"),
|
||||
)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Uniform,
|
||||
create_struct_layout_tests(InputStorageType::Uniform),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_F16)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
@@ -386,34 +472,15 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Storage,
|
||||
create_struct_layout_tests(InputStorageType::Storage),
|
||||
create_16bit_struct_layout_test(),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::PUSH_CONSTANTS)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits {
|
||||
max_push_constant_size: MAX_BUFFER_SIZE as u32,
|
||||
..Limits::downlevel_defaults()
|
||||
}),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::PushConstant,
|
||||
create_struct_layout_tests(InputStorageType::PushConstant),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_INT64)
|
||||
.features(Features::SHADER_F16)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
@@ -421,41 +488,138 @@ static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Storage,
|
||||
create_64bit_struct_layout_tests(),
|
||||
create_16bit_struct_layout_test(),
|
||||
)
|
||||
});
|
||||
|
||||
#[gpu_test]
|
||||
static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_INT64)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults()),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::Storage,
|
||||
create_64bit_struct_layout_tests(),
|
||||
)
|
||||
});
|
||||
fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
|
||||
let mut tests = Vec::new();
|
||||
|
||||
#[gpu_test]
|
||||
static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits {
|
||||
max_push_constant_size: MAX_BUFFER_SIZE as u32,
|
||||
..Limits::downlevel_defaults()
|
||||
}),
|
||||
)
|
||||
.run_async(|ctx| {
|
||||
shader_input_output_test(
|
||||
ctx,
|
||||
InputStorageType::PushConstant,
|
||||
create_64bit_struct_layout_tests(),
|
||||
)
|
||||
});
|
||||
fn f16asu16(f32: f32) -> u16 {
|
||||
half::f16::from_f32(f32).to_bits()
|
||||
}
|
||||
|
||||
// 16 bit alignment tests
|
||||
{
|
||||
let members =
|
||||
"scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32";
|
||||
let direct = String::from(
|
||||
"\
|
||||
output[0] = u32(input.scalar1);
|
||||
output[1] = u32(input.scalar2);
|
||||
output[2] = u32(input.v3.x);
|
||||
output[3] = u32(input.v3.y);
|
||||
output[4] = u32(input.v3.z);
|
||||
output[5] = u32(input.tuck_in);
|
||||
output[6] = u32(input.scalar4);
|
||||
output[7] = u32(extractBits(input.larger, 0u, 16u));
|
||||
output[8] = u32(extractBits(input.larger, 16u, 16u));
|
||||
",
|
||||
);
|
||||
|
||||
tests.push(ShaderTest::new(
|
||||
"f16 alignment".into(),
|
||||
members.into(),
|
||||
direct,
|
||||
&[
|
||||
f16asu16(0.0),
|
||||
f16asu16(1.0),
|
||||
f16asu16(2.0),
|
||||
f16asu16(3.0),
|
||||
f16asu16(4.0),
|
||||
f16asu16(5.0),
|
||||
f16asu16(6.0),
|
||||
f16asu16(7.0),
|
||||
f16asu16(8.0),
|
||||
f16asu16(9.0),
|
||||
10_u16,
|
||||
11_u16,
|
||||
// Some extra values to help debug if the test fails.
|
||||
12_u16,
|
||||
13_u16,
|
||||
14_u16,
|
||||
15_u16,
|
||||
16_u16,
|
||||
17_u16,
|
||||
18_u16,
|
||||
19_u16,
|
||||
20_u16,
|
||||
],
|
||||
&[
|
||||
0, // scalar1
|
||||
1, // scalar2
|
||||
4, 5, 6, // v3
|
||||
7, // tuck_in
|
||||
8, // scalar4
|
||||
10, // larger[0..16]
|
||||
11, // larger[16..32]
|
||||
],
|
||||
));
|
||||
}
|
||||
|
||||
// Matrix tests
|
||||
{
|
||||
let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h";
|
||||
let direct = String::from(
|
||||
"\
|
||||
output[0] = u32(input.m2[0].x);
|
||||
output[1] = u32(input.m2[0].y);
|
||||
output[2] = u32(input.m2[1].x);
|
||||
output[3] = u32(input.m2[1].y);
|
||||
|
||||
output[4] = u32(input.m3[0].x);
|
||||
output[5] = u32(input.m3[0].y);
|
||||
output[6] = u32(input.m3[0].z);
|
||||
output[7] = u32(input.m3[1].x);
|
||||
output[8] = u32(input.m3[1].y);
|
||||
output[9] = u32(input.m3[1].z);
|
||||
output[10] = u32(input.m3[2].x);
|
||||
output[11] = u32(input.m3[2].y);
|
||||
output[12] = u32(input.m3[2].z);
|
||||
|
||||
output[13] = u32(input.m4[0].x);
|
||||
output[14] = u32(input.m4[0].y);
|
||||
output[15] = u32(input.m4[0].z);
|
||||
output[16] = u32(input.m4[0].w);
|
||||
output[17] = u32(input.m4[1].x);
|
||||
output[18] = u32(input.m4[1].y);
|
||||
output[19] = u32(input.m4[1].z);
|
||||
output[20] = u32(input.m4[1].w);
|
||||
output[21] = u32(input.m4[2].x);
|
||||
output[22] = u32(input.m4[2].y);
|
||||
output[23] = u32(input.m4[2].z);
|
||||
output[24] = u32(input.m4[2].w);
|
||||
output[25] = u32(input.m4[3].x);
|
||||
output[26] = u32(input.m4[3].y);
|
||||
output[27] = u32(input.m4[3].z);
|
||||
output[28] = u32(input.m4[3].w);
|
||||
",
|
||||
);
|
||||
|
||||
tests.push(ShaderTest::new(
|
||||
"f16 matrix alignment".into(),
|
||||
members.into(),
|
||||
direct,
|
||||
&(0..32).map(|x| f16asu16(x as f32)).collect::<Vec<_>>(),
|
||||
&[
|
||||
0, 1, // m2[0]
|
||||
2, 3, // m2[1]
|
||||
//
|
||||
4, 5, 6, // m3[0]
|
||||
8, 9, 10, // m3[1]
|
||||
12, 13, 14, // m3[2]
|
||||
//
|
||||
16, 17, 18, 19, // m4[0]
|
||||
20, 21, 22, 23, // m4[1]
|
||||
24, 25, 26, 27, // m4[2]
|
||||
28, 29, 30, 31, // m4[3]
|
||||
],
|
||||
));
|
||||
}
|
||||
|
||||
// Insert `enable f16;` header
|
||||
tests
|
||||
.into_iter()
|
||||
.map(|test| test.header("enable f16;".into()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -383,6 +383,10 @@ pub fn create_validator(
|
||||
features.contains(wgt::Features::PUSH_CONSTANTS),
|
||||
);
|
||||
caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
|
||||
caps.set(
|
||||
Caps::SHADER_FLOAT16,
|
||||
features.contains(wgt::Features::SHADER_F16),
|
||||
);
|
||||
caps.set(
|
||||
Caps::PRIMITIVE_INDEX,
|
||||
features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
|
||||
|
||||
@@ -425,6 +425,23 @@ impl super::Adapter {
|
||||
&& features1.Int64ShaderOps.as_bool(),
|
||||
);
|
||||
|
||||
let float16_supported = {
|
||||
let mut features4 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4::default();
|
||||
let hr = unsafe {
|
||||
device.CheckFeatureSupport(
|
||||
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax
|
||||
ptr::from_mut(&mut features4).cast(),
|
||||
size_of::<Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4>() as _,
|
||||
)
|
||||
};
|
||||
hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool()
|
||||
};
|
||||
|
||||
features.set(
|
||||
wgt::Features::SHADER_F16,
|
||||
shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported,
|
||||
);
|
||||
|
||||
features.set(
|
||||
wgt::Features::TEXTURE_INT64_ATOMIC,
|
||||
shader_model >= naga::back::hlsl::ShaderModel::V6_6
|
||||
@@ -612,7 +629,7 @@ impl crate::Adapter for super::Adapter {
|
||||
|
||||
unsafe fn open(
|
||||
&self,
|
||||
_features: wgt::Features,
|
||||
features: wgt::Features,
|
||||
limits: &wgt::Limits,
|
||||
memory_hints: &wgt::MemoryHints,
|
||||
) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
|
||||
@@ -633,6 +650,7 @@ impl crate::Adapter for super::Adapter {
|
||||
let device = super::Device::new(
|
||||
self.device.clone(),
|
||||
queue.clone(),
|
||||
features,
|
||||
limits,
|
||||
memory_hints,
|
||||
self.private_caps,
|
||||
|
||||
@@ -33,9 +33,11 @@ use crate::{
|
||||
const NAGA_LOCATION_SEMANTIC: &[u8] = c"LOC".to_bytes();
|
||||
|
||||
impl super::Device {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn new(
|
||||
raw: Direct3D12::ID3D12Device,
|
||||
present_queue: Direct3D12::ID3D12CommandQueue,
|
||||
features: wgt::Features,
|
||||
limits: &wgt::Limits,
|
||||
memory_hints: &wgt::MemoryHints,
|
||||
private_caps: super::PrivateCapabilities,
|
||||
@@ -178,6 +180,7 @@ impl super::Device {
|
||||
event: Event::create(false, false)?,
|
||||
},
|
||||
private_caps,
|
||||
features,
|
||||
shared: Arc::new(shared),
|
||||
rtv_pool: Mutex::new(rtv_pool),
|
||||
dsv_pool: Mutex::new(descriptor::CpuPool::new(
|
||||
|
||||
@@ -634,6 +634,7 @@ pub struct Device {
|
||||
present_queue: Direct3D12::ID3D12CommandQueue,
|
||||
idler: Idler,
|
||||
private_caps: PrivateCapabilities,
|
||||
features: wgt::Features,
|
||||
shared: Arc<DeviceShared>,
|
||||
// CPU only pools
|
||||
rtv_pool: Mutex<descriptor::CpuPool>,
|
||||
|
||||
@@ -268,7 +268,7 @@ pub(super) fn compile_dxc(
|
||||
let raw_ep = OPCWSTR::new(raw_ep);
|
||||
let full_stage = OPCWSTR::new(full_stage);
|
||||
|
||||
let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 12>::new_const();
|
||||
let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 13>::new_const();
|
||||
|
||||
if let Some(source_name) = source_name.as_ref() {
|
||||
compile_args.push(source_name.ptr())
|
||||
@@ -298,6 +298,10 @@ pub(super) fn compile_dxc(
|
||||
compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS);
|
||||
}
|
||||
|
||||
if device.features.contains(wgt::Features::SHADER_F16) {
|
||||
compile_args.push(windows::core::w!("-enable-16bit-types"));
|
||||
}
|
||||
|
||||
let buffer = Dxc::DxcBuffer {
|
||||
Ptr: source.as_ptr().cast(),
|
||||
Size: source.len(),
|
||||
|
||||
@@ -399,6 +399,7 @@ impl PhysicalDeviceFeatures {
|
||||
vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
|
||||
vk::PhysicalDevice16BitStorageFeatures::default()
|
||||
.storage_buffer16_bit_access(true)
|
||||
.storage_input_output16(true)
|
||||
.uniform_and_storage_buffer16_bit_access(true),
|
||||
))
|
||||
} else {
|
||||
@@ -720,7 +721,8 @@ impl PhysicalDeviceFeatures {
|
||||
F::SHADER_F16,
|
||||
f16_i8.shader_float16 != 0
|
||||
&& bit16.storage_buffer16_bit_access != 0
|
||||
&& bit16.uniform_and_storage_buffer16_bit_access != 0,
|
||||
&& bit16.uniform_and_storage_buffer16_bit_access != 0
|
||||
&& bit16.storage_input_output16 != 0,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1968,6 +1970,10 @@ impl super::Adapter {
|
||||
capabilities.push(spv::Capability::Int64);
|
||||
}
|
||||
|
||||
if features.contains(wgt::Features::SHADER_F16) {
|
||||
capabilities.push(spv::Capability::Float16);
|
||||
}
|
||||
|
||||
if features.intersects(
|
||||
wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
|
||||
| wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX
|
||||
|
||||
@@ -1340,18 +1340,20 @@ bitflags_array! {
|
||||
/// This is a web and native feature.
|
||||
const INDIRECT_FIRST_INSTANCE = WEBGPU_FEATURE_INDIRECT_FIRST_INSTANCE;
|
||||
|
||||
/// Allows shaders to acquire the FP16 ability
|
||||
/// Allows shaders to use 16-bit floating point types. You may use them uniform buffers,
|
||||
/// storage buffers, and local variables. You may not use them in push constants.
|
||||
///
|
||||
/// Note: this is not supported in `naga` yet, only through `spirv-passthrough` right now.
|
||||
/// In order to use this in WGSL shaders, you must add `enable f16;` to the top of your shader,
|
||||
/// before any global items.
|
||||
///
|
||||
/// Supported Platforms:
|
||||
/// - Vulkan
|
||||
/// - Metal
|
||||
/// - DX12
|
||||
///
|
||||
/// This is a web and native feature.
|
||||
const SHADER_F16 = WEBGPU_FEATURE_SHADER_F16;
|
||||
|
||||
|
||||
/// Allows for usage of textures of format [`TextureFormat::Rg11b10Ufloat`] as a render target
|
||||
///
|
||||
/// Supported platforms:
|
||||
|
||||
Reference in New Issue
Block a user