feat: implement F16 support in shaders (#5701)

Co-authored-by: FL33TW00D <fleetwoodpersonal@gmail.com> Co-authored-by: Connor Fitzgerald <connorwadefitzgerald@gmail.com> Co-authored-by: ErichDonGubler <erichdongubler@gmail.com>
2026-01-09 14:48:08 -05:00 · 2025-03-19 16:26:50 +00:00
parent b912232188
commit c6286791fe
62 changed files with 3743 additions and 998 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1785,8 +1785,12 @@ version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1"
 dependencies = [
+ "arbitrary",
+ "bytemuck",
 "cfg-if",
 "crunchy",
+ "num-traits",
+ "serde",
 ]

 [[package]]
@@ -2193,6 +2197,12 @@ dependencies = [
 "windows-targets 0.48.5",
 ]

+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
 [[package]]
 name = "libredox"
 version = "0.1.3"
@@ -2393,12 +2403,14 @@ dependencies = [
 "codespan-reporting",
 "diff",
 "env_logger",
+ "half",
 "hashbrown",
 "hexf-parse",
 "hlsl-snapshots",
 "indexmap",
 "itertools 0.13.0",
 "log",
+ "num-traits",
 "petgraph 0.7.1",
 "pp-rs",
 "ron",
@@ -2574,6 +2586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
 "autocfg",
+ "libm",
 ]

 [[package]]
@@ -4843,6 +4856,7 @@ dependencies = [
 "env_logger",
 "futures-lite",
 "glam",
+ "half",
 "image",
 "itertools 0.13.0",
 "js-sys",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,6 +108,7 @@ fern = "0.7"
 flume = "0.11"
 futures-lite = "2"
 glam = "0.29"
+half = "2.5" # We require 2.5 to have `Arbitrary` support.
 hashbrown = { version = "0.14.5", default-features = false, features = [
    "ahash",
    "inline-more",
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -41,10 +41,17 @@ msl-out = []
 ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
 msl-out-if-target-apple = []

-serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"]
+serialize = [
+    "dep:serde",
+    "bitflags/serde",
+    "half/serde",
+    "hashbrown/serde",
+    "indexmap/serde",
+]
 deserialize = [
    "dep:serde",
    "bitflags/serde",
+    "half/serde",
    "hashbrown/serde",
    "indexmap/serde",
 ]
@@ -84,9 +91,12 @@ termcolor = { version = "1.4.1" }
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
 hashbrown.workspace = true
+half = { workspace = true, features = ["arbitrary", "num-traits"] }
 rustc-hash.workspace = true
 indexmap.workspace = true
 log = "0.4"
+# `half` requires 0.2.16 for `FromBytes` and `ToBytes`.
+num-traits = "0.2.16"
 strum = { workspace = true, optional = true }
 spirv = { version = "0.3", optional = true }
 thiserror.workspace = true
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -2703,6 +2703,9 @@ impl<'a, W: Write> Writer<'a, W> {
                    // decimal part even it's zero which is needed for a valid glsl float constant
                    crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?,
                    crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                    crate::Literal::F16(_) => {
+                        return Err(Error::Custom("GLSL has no 16-bit float type".into()));
+                    }
                    // Unsigned integers need a `u` at the end
                    //
                    // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -2628,6 +2628,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                // decimal part even it's zero
                crate::Literal::F64(value) => write!(self.out, "{value:?}L")?,
                crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                crate::Literal::F16(value) => write!(self.out, "{value:?}h")?,
                crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                // HLSL has no suffix for explicit i32 literals, but not using any suffix
                // makes the type ambiguous which prevents overload resolution from
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -8,6 +8,9 @@ use core::{
    fmt::{Display, Error as FmtError, Formatter, Write},
    iter,
 };
+use num_traits::real::Real as _;
+
+use half::f16;

 use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo};
 use crate::{
@@ -182,9 +185,11 @@ impl Display for TypeContext<'_> {
                write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
            }
            crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
-            crate::TypeInner::Matrix { columns, rows, .. } => {
-                put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
-            }
+            crate::TypeInner::Matrix {
+                columns,
+                rows,
+                scalar,
+            } => put_numeric_type(out, scalar, &[rows, columns]),
            crate::TypeInner::Pointer { base, space } => {
                let sub = Self {
                    handle: base,
@@ -425,8 +430,12 @@ impl crate::Scalar {
        match self {
            Self {
                kind: Sk::Float,
-                width: _,
+                width: 4,
            } => "float",
+            Self {
+                kind: Sk::Float,
+                width: 2,
+            } => "half",
            Self {
                kind: Sk::Sint,
                width: 4,
@@ -483,7 +492,7 @@ fn should_pack_struct_member(
    match *ty_inner {
        crate::TypeInner::Vector {
            size: crate::VectorSize::Tri,
-            scalar: scalar @ crate::Scalar { width: 4, .. },
+            scalar: scalar @ crate::Scalar { width: 4 | 2, .. },
        } if is_tight => Some(scalar),
        _ => None,
    }
@@ -1459,6 +1468,21 @@ impl<W: Write> Writer<W> {
                crate::Literal::F64(_) => {
                    return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
                }
+                crate::Literal::F16(value) => {
+                    if value.is_infinite() {
+                        let sign = if value.is_sign_negative() { "-" } else { "" };
+                        write!(self.out, "{sign}INFINITY")?;
+                    } else if value.is_nan() {
+                        write!(self.out, "NAN")?;
+                    } else {
+                        let suffix = if value.fract() == f16::from_f32(0.0) {
+                            ".0h"
+                        } else {
+                            "h"
+                        };
+                        write!(self.out, "{value}{suffix}")?;
+                    }
+                }
                crate::Literal::F32(value) => {
                    if value.is_infinite() {
                        let sign = if value.is_sign_negative() { "-" } else { "" };
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -4,6 +4,7 @@ Implementations for `BlockContext` methods.

 use alloc::vec::Vec;

+use arrayvec::ArrayVec;
 use spirv::Word;

 use super::{
@@ -1612,159 +1613,7 @@ impl BlockContext<'_> {
                expr,
                kind,
                convert,
-            } => {
-                use crate::ScalarKind as Sk;
-
-                let expr_id = self.cached[expr];
-                let (src_scalar, src_size, is_matrix) =
-                    match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) {
-                        crate::TypeInner::Scalar(scalar) => (scalar, None, false),
-                        crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false),
-                        crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true),
-                        ref other => {
-                            log::error!("As source {:?}", other);
-                            return Err(Error::Validation("Unexpected Expression::As source"));
-                        }
-                    };
-
-                enum Cast {
-                    Identity,
-                    Unary(spirv::Op),
-                    Binary(spirv::Op, Word),
-                    Ternary(spirv::Op, Word, Word),
-                }
-
-                let cast = if is_matrix {
-                    // we only support identity casts for matrices
-                    Cast::Unary(spirv::Op::CopyObject)
-                } else {
-                    match (src_scalar.kind, kind, convert) {
-                        // Filter out identity casts. Some Adreno drivers are
-                        // confused by no-op OpBitCast instructions.
-                        (src_kind, kind, convert)
-                            if src_kind == kind
-                                && convert.filter(|&width| width != src_scalar.width).is_none() =>
-                        {
-                            Cast::Identity
-                        }
-                        (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
-                        (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
-                        // casting to a bool - generate `OpXxxNotEqual`
-                        (_, Sk::Bool, Some(_)) => {
-                            let op = match src_scalar.kind {
-                                Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
-                                Sk::Float => spirv::Op::FUnordNotEqual,
-                                Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, src_scalar)?;
-                            let zero_id = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: src_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    self.writer.get_constant_composite(ty, &self.temp_list)
-                                }
-                                None => zero_scalar_id,
-                            };
-
-                            Cast::Binary(op, zero_id)
-                        }
-                        // casting from a bool - generate `OpSelect`
-                        (Sk::Bool, _, Some(dst_width)) => {
-                            let dst_scalar = crate::Scalar {
-                                kind,
-                                width: dst_width,
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, dst_scalar)?;
-                            let one_scalar_id =
-                                self.writer.get_constant_scalar_with(1, dst_scalar)?;
-                            let (accept_id, reject_id) = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: dst_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    let vec0_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    self.temp_list.fill(one_scalar_id);
-
-                                    let vec1_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    (vec1_id, vec0_id)
-                                }
-                                None => (one_scalar_id, zero_scalar_id),
-                            };
-
-                            Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
-                        }
-                        (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
-                        (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
-                        (Sk::Float, Sk::Float, Some(dst_width))
-                            if src_scalar.width != dst_width =>
-                        {
-                            Cast::Unary(spirv::Op::FConvert)
-                        }
-                        (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
-                        (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
-                        (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        // We assume it's either an identity cast, or int-uint.
-                        _ => Cast::Unary(spirv::Op::Bitcast),
-                    }
-                };
-
-                let id = self.gen_id();
-                let instruction = match cast {
-                    Cast::Identity => None,
-                    Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)),
-                    Cast::Binary(op, operand) => Some(Instruction::binary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        operand,
-                    )),
-                    Cast::Ternary(op, op1, op2) => Some(Instruction::ternary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        op1,
-                        op2,
-                    )),
-                };
-                if let Some(instruction) = instruction {
-                    block.body.push(instruction);
-                    id
-                } else {
-                    expr_id
-                }
-            }
+            } => self.write_as_expression(expr, convert, kind, block, result_type_id)?,
            crate::Expression::ImageLoad {
                image,
                coordinate,
@@ -1925,6 +1774,237 @@ impl BlockContext<'_> {
        Ok(())
    }

+    /// Helper which focuses on generating the `As` expressions and the various conversions
+    /// that need to happen because of that.
+    fn write_as_expression(
+        &mut self,
+        expr: Handle<crate::Expression>,
+        convert: Option<u8>,
+        kind: crate::ScalarKind,
+
+        block: &mut Block,
+        result_type_id: u32,
+    ) -> Result<u32, Error> {
+        use crate::ScalarKind as Sk;
+        let expr_id = self.cached[expr];
+        let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types);
+
+        // Matrix casts needs special treatment in SPIR-V, as the cast functions
+        // can take vectors or scalars, but not matrices. In order to cast a matrix
+        // we need to cast each column of the matrix individually and construct a new
+        // matrix from the converted columns.
+        if let crate::TypeInner::Matrix {
+            columns,
+            rows,
+            scalar,
+        } = *ty
+        {
+            let Some(convert) = convert else {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            };
+
+            if convert == scalar.width {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            }
+
+            if kind != Sk::Float {
+                // Only float conversions are supported for matrices.
+                return Err(Error::Validation("Matrices must be floats"));
+            }
+
+            // Type of each extracted column
+            let column_src_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar,
+                })));
+
+            // Type of the column after conversion
+            let column_dst_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar: crate::Scalar {
+                        kind,
+                        width: convert,
+                    },
+                })));
+
+            let mut components = ArrayVec::<Word, 4>::new();
+
+            for column in 0..columns as usize {
+                let column_id = self.gen_id();
+                block.body.push(Instruction::composite_extract(
+                    column_src_ty,
+                    column_id,
+                    expr_id,
+                    &[column as u32],
+                ));
+
+                let column_conv_id = self.gen_id();
+                block.body.push(Instruction::unary(
+                    spirv::Op::FConvert,
+                    column_dst_ty,
+                    column_conv_id,
+                    column_id,
+                ));
+
+                components.push(column_conv_id);
+            }
+
+            let construct_id = self.gen_id();
+
+            block.body.push(Instruction::composite_construct(
+                result_type_id,
+                construct_id,
+                &components,
+            ));
+
+            return Ok(construct_id);
+        }
+
+        let (src_scalar, src_size) = match *ty {
+            crate::TypeInner::Scalar(scalar) => (scalar, None),
+            crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)),
+            ref other => {
+                log::error!("As source {:?}", other);
+                return Err(Error::Validation("Unexpected Expression::As source"));
+            }
+        };
+
+        enum Cast {
+            Identity,
+            Unary(spirv::Op),
+            Binary(spirv::Op, Word),
+            Ternary(spirv::Op, Word, Word),
+        }
+        let cast = match (src_scalar.kind, kind, convert) {
+            // Filter out identity casts. Some Adreno drivers are
+            // confused by no-op OpBitCast instructions.
+            (src_kind, kind, convert)
+                if src_kind == kind
+                    && convert.filter(|&width| width != src_scalar.width).is_none() =>
+            {
+                Cast::Identity
+            }
+            (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
+            (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
+            // casting to a bool - generate `OpXxxNotEqual`
+            (_, Sk::Bool, Some(_)) => {
+                let op = match src_scalar.kind {
+                    Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
+                    Sk::Float => spirv::Op::FUnordNotEqual,
+                    Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?;
+                let zero_id = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: src_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        self.writer.get_constant_composite(ty, &self.temp_list)
+                    }
+                    None => zero_scalar_id,
+                };
+
+                Cast::Binary(op, zero_id)
+            }
+            // casting from a bool - generate `OpSelect`
+            (Sk::Bool, _, Some(dst_width)) => {
+                let dst_scalar = crate::Scalar {
+                    kind,
+                    width: dst_width,
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?;
+                let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?;
+                let (accept_id, reject_id) = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: dst_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        self.temp_list.fill(one_scalar_id);
+
+                        let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        (vec1_id, vec0_id)
+                    }
+                    None => (one_scalar_id, zero_scalar_id),
+                };
+
+                Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
+            }
+            (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
+            (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
+            (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::FConvert)
+            }
+            (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
+            (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
+            (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            // We assume it's either an identity cast, or int-uint.
+            _ => Cast::Unary(spirv::Op::Bitcast),
+        };
+        Ok(match cast {
+            Cast::Identity => expr_id,
+            Cast::Unary(op) => {
+                let id = self.gen_id();
+                block
+                    .body
+                    .push(Instruction::unary(op, result_type_id, id, expr_id));
+                id
+            }
+            Cast::Binary(op, operand) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::binary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    operand,
+                ));
+                id
+            }
+            Cast::Ternary(op, op1, op2) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::ternary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    op1,
+                    op2,
+                ));
+                id
+            }
+        })
+    }
+
    /// Build an `OpAccessChain` instruction.
    ///
    /// Emit any needed bounds-checking expressions to `block`.
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -405,6 +405,10 @@ impl super::Instruction {
        instruction
    }

+    pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self {
+        Self::constant(result_type_id, id, &[low])
+    }
+
    pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self {
        Self::constant(result_type_id, id, &[value])
    }
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -1154,6 +1154,15 @@ impl Writer {
                if bits == 64 {
                    self.capabilities_used.insert(spirv::Capability::Float64);
                }
+                if bits == 16 {
+                    self.capabilities_used.insert(spirv::Capability::Float16);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageInputOutput16);
+                }
                Instruction::type_float(id, bits)
            }
            Sk::Bool => Instruction::type_bool(id),
@@ -1222,6 +1231,19 @@ impl Writer {
                )?;
                self.use_extension("SPV_EXT_shader_atomic_float_add");
            }
+            // 16 bit floating-point support requires Float16 capability
+            crate::TypeInner::Matrix {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Vector {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Scalar(crate::Scalar::F16) => {
+                self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?;
+                self.use_extension("SPV_KHR_16bit_storage");
+            }
            _ => {}
        }
        Ok(())
@@ -1498,6 +1520,10 @@ impl Writer {
                Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32)
            }
            crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
+            crate::Literal::F16(value) => {
+                let low = value.to_bits();
+                Instruction::constant_16bit(type_id, id, low as u32)
+            }
            crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
            crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
            crate::Literal::U64(value) => {
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -5,6 +5,7 @@ use alloc::{
    vec::Vec,
 };
 use core::fmt::Write;
+use hashbrown::HashSet;

 use super::Error;
 use super::ToWgslIfImplemented as _;
@@ -129,6 +130,9 @@ impl<W: Write> Writer<W> {
        // Write all needed directives.
        self.write_enable_dual_source_blending_if_needed(module)?;

+        // Write all `enable` declarations
+        self.write_enable_declarations(module)?;
+
        // Write all structs
        for (handle, ty) in module.types.iter() {
            if let TypeInner::Struct { ref members, .. } = ty.inner {
@@ -222,6 +226,41 @@ impl<W: Write> Writer<W> {
        Ok(())
    }

+    /// Helper method which writes all the `enable` declarations
+    /// needed for a module.
+    fn write_enable_declarations(&mut self, module: &Module) -> BackendResult {
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+        enum WrittenDeclarations {
+            F16,
+        }
+
+        let mut written_declarations = HashSet::new();
+
+        // Write all the `enable` declarations
+        for (_, ty) in module.types.iter() {
+            match ty.inner {
+                TypeInner::Scalar(scalar)
+                | TypeInner::Vector { scalar, .. }
+                | TypeInner::Matrix { scalar, .. } => {
+                    if scalar == crate::Scalar::F16
+                        && !written_declarations.contains(&WrittenDeclarations::F16)
+                    {
+                        writeln!(self.out, "enable f16;")?;
+                        written_declarations.insert(WrittenDeclarations::F16);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        if !written_declarations.is_empty() {
+            // Empty line for readability
+            writeln!(self.out)?;
+        }
+
+        Ok(())
+    }
+
    /// Helper method used to write
    /// [functions](https://gpuweb.github.io/gpuweb/wgsl/#functions)
    ///
@@ -1092,6 +1131,7 @@ impl<W: Write> Writer<W> {

        match expressions[expr] {
            Expression::Literal(literal) => match literal {
+                crate::Literal::F16(value) => write!(self.out, "{value}h")?,
                crate::Literal::F32(value) => write!(self.out, "{value}f")?,
                crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                crate::Literal::I32(value) => {
--- a/naga/src/common/wgsl/to_wgsl.rs
+++ b/naga/src/common/wgsl/to_wgsl.rs
@@ -270,6 +270,7 @@ impl TryToWgsl for crate::Scalar {
        Some(match self {
            Scalar::F64 => "f64",
            Scalar::F32 => "f32",
+            Scalar::F16 => "f16",
            Scalar::I32 => "i32",
            Scalar::U32 => "u32",
            Scalar::I64 => "i64",
--- a/naga/src/front/glsl/error.rs
+++ b/naga/src/front/glsl/error.rs
@@ -109,9 +109,15 @@ pub enum ErrorKind {
    /// Unsupported matrix of the form matCx2
    ///
    /// Our IR expects matrices of the form matCx2 to have a stride of 8 however
-    /// matrices in the std140 layout have a stride of at least 16
-    #[error("unsupported matrix of the form matCx2 in std140 block layout")]
-    UnsupportedMatrixTypeInStd140,
+    /// matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 },
+    /// Unsupported matrix of the form f16matCxR
+    ///
+    /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count,
+    /// however matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 },
    /// A variable with the same name already exists in the current scope.
    #[error("Variable already declared: {0}")]
    VariableAlreadyDeclared(String),
--- a/naga/src/front/glsl/offset.rs
+++ b/naga/src/front/glsl/offset.rs
@@ -122,11 +122,25 @@ pub fn calculate_offset(
            }

            // See comment on the error kind
-            if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi {
-                errors.push(Error {
-                    kind: ErrorKind::UnsupportedMatrixTypeInStd140,
-                    meta,
-                });
+            if StructLayout::Std140 == layout {
+                // Do the f16 test first, as it's more specific
+                if scalar == Scalar::F16 {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedF16MatrixInStd140 {
+                            columns: columns as u8,
+                            rows: rows as u8,
+                        },
+                        meta,
+                    });
+                }
+                if rows == crate::VectorSize::Bi {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 {
+                            columns: columns as u8,
+                        },
+                        meta,
+                    });
+                }
            }

            (align, align * columns as u32)
--- a/naga/src/front/glsl/types.rs
+++ b/naga/src/front/glsl/types.rs
@@ -12,6 +12,10 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
            name: None,
            inner: TypeInner::Scalar(Scalar::BOOL),
        }),
+        "float16_t" => Some(Type {
+            name: None,
+            inner: TypeInner::Scalar(Scalar::F16),
+        }),
        "float" => Some(Type {
            name: None,
            inner: TypeInner::Scalar(Scalar::F32),
@@ -42,6 +46,7 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
                    "i" => Scalar::I32,
                    "u" => Scalar::U32,
                    "d" => Scalar::F64,
+                    "f16" => Scalar::F16,
                    _ => return None,
                })
            }
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -39,6 +39,7 @@ use alloc::{borrow::ToOwned, format, string::String, vec, vec::Vec};
 use core::{convert::TryInto, mem, num::NonZeroU32};
 use std::path::PathBuf;

+use half::f16;
 use petgraph::graphmap::GraphMap;

 use super::atomic_upgrade::Upgrades;
@@ -82,6 +83,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
    "SPV_KHR_vulkan_memory_model",
    "SPV_KHR_multiview",
    "SPV_EXT_shader_atomic_float_add",
+    "SPV_KHR_16bit_storage",
 ];
 pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];

@@ -5604,6 +5606,9 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
            }) => {
                let low = self.next()?;
                match width {
+                    // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal
+                    // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word.
+                    2 => crate::Literal::F16(f16::from_bits(low as u16)),
                    4 => crate::Literal::F32(f32::from_bits(low)),
                    8 => {
                        inst.expect(5)?;
--- a/naga/src/front/wgsl/error.rs
+++ b/naga/src/front/wgsl/error.rs
@@ -153,8 +153,6 @@ pub enum NumberError {
    Invalid,
    #[error("numeric literal not representable by target type")]
    NotRepresentable,
-    #[error("unimplemented f16 type")]
-    UnimplementedF16,
 }

 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -1026,23 +1024,22 @@ impl<'a> Error<'a> {
                )],
            },
            Error::EnableExtensionNotEnabled { kind, span } => ParseError {
-                message: format!("`{}` enable-extension is not enabled", kind.to_ident()),
+                message: format!("the `{}` enable extension is not enabled", kind.to_ident()),
                labels: vec![(
                    span,
                    format!(
                        concat!(
-                            "the `{}` enable-extension is needed for this functionality, ",
-                            "but it is not currently enabled"
+                            "the `{}` \"Enable Extension\" is needed for this functionality, ",
+                            "but it is not currently enabled."
                        ),
                        kind.to_ident()
                    )
                    .into(),
                )],
-                #[allow(irrefutable_let_patterns)]
                notes: if let EnableExtension::Unimplemented(kind) = kind {
                    vec![format!(
                        concat!(
-                            "This enable-extension is not yet implemented. ",
+                            "This \"Enable Extension\" is not yet implemented. ",
                            "Let Naga maintainers know that you ran into this at ",
                            "<https://github.com/gfx-rs/wgpu/issues/{}>, ",
                            "so they can prioritize it!"
@@ -1050,7 +1047,12 @@ impl<'a> Error<'a> {
                        kind.tracking_issue_num()
                    )]
                } else {
-                    vec![]
+                    vec![
+                        format!(
+                            "You can enable this extension by adding `enable {};` at the top of the shader, before any other items.",
+                            kind.to_ident()
+                        ),
+                    ]
                },
            },
            Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError {
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -2013,6 +2013,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
        let expr: Typed<crate::Expression> = match *expr {
            ast::Expression::Literal(literal) => {
                let literal = match literal {
+                    ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f),
                    ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
                    ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
                    ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -1,3 +1,6 @@
+use crate::front::wgsl::parse::directive::enable_extension::{
+    EnableExtensions, ImplementedEnableExtension,
+};
 use crate::front::wgsl::{Error, Result, Scalar};
 use crate::Span;

@@ -113,10 +116,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<'_, crate::StorageFo
    })
 }

-pub fn get_scalar_type(word: &str) -> Option<Scalar> {
+pub fn get_scalar_type(
+    enable_extensions: &EnableExtensions,
+    span: Span,
+    word: &str,
+) -> Result<'static, Option<Scalar>> {
    use crate::ScalarKind as Sk;
-    match word {
-        // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }),
+    let scalar = match word {
+        "f16" => Some(Scalar {
+            kind: Sk::Float,
+            width: 2,
+        }),
        "f32" => Some(Scalar {
            kind: Sk::Float,
            width: 4,
@@ -146,7 +156,18 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
            width: crate::BOOL_WIDTH,
        }),
        _ => None,
+    };
+
+    if matches!(scalar, Some(Scalar::F16))
+        && !enable_extensions.contains(ImplementedEnableExtension::F16)
+    {
+        return Err(Box::new(Error::EnableExtensionNotEnabled {
+            span,
+            kind: ImplementedEnableExtension::F16.into(),
+        }));
    }
+
+    Ok(scalar)
 }

 pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> {
--- a/naga/src/front/wgsl/parse/directive/enable_extension.rs
+++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs
@@ -11,19 +11,23 @@ use alloc::boxed::Box;
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct EnableExtensions {
    dual_source_blending: bool,
+    /// Whether `enable f16;` was written earlier in the shader module.
+    f16: bool,
 }

 impl EnableExtensions {
    pub(crate) const fn empty() -> Self {
        Self {
+            f16: false,
            dual_source_blending: false,
        }
    }

    /// Add an enable-extension to the set requested by a module.
    pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) {
-        let field: &mut bool = match ext {
+        let field = match ext {
            ImplementedEnableExtension::DualSourceBlending => &mut self.dual_source_blending,
+            ImplementedEnableExtension::F16 => &mut self.f16,
        };
        *field = true;
    }
@@ -32,6 +36,7 @@ impl EnableExtensions {
    pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool {
        match ext {
            ImplementedEnableExtension::DualSourceBlending => self.dual_source_blending,
+            ImplementedEnableExtension::F16 => self.f16,
        }
    }
 }
@@ -47,7 +52,6 @@ impl Default for EnableExtensions {
 /// WGSL spec.: <https://www.w3.org/TR/WGSL/#enable-extensions-sec>
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
 pub enum EnableExtension {
-    #[allow(unused)]
    Implemented(ImplementedEnableExtension),
    Unimplemented(UnimplementedEnableExtension),
 }
@@ -66,7 +70,7 @@ impl EnableExtension {
    /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible.
    pub(crate) fn from_ident(word: &str, span: Span) -> Result<Self> {
        Ok(match word {
-            Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16),
+            Self::F16 => Self::Implemented(ImplementedEnableExtension::F16),
            Self::CLIP_DISTANCES => {
                Self::Unimplemented(UnimplementedEnableExtension::ClipDistances)
            }
@@ -82,10 +86,9 @@ impl EnableExtension {
        match self {
            Self::Implemented(kind) => match kind {
                ImplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING,
+                ImplementedEnableExtension::F16 => Self::F16,
            },
-
            Self::Unimplemented(kind) => match kind {
-                UnimplementedEnableExtension::F16 => Self::F16,
                UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES,
            },
        }
@@ -101,17 +104,17 @@ pub enum ImplementedEnableExtension {
    ///
    /// [`enable dual_source_blending;`]: https://www.w3.org/TR/WGSL/#extension-dual_source_blending
    DualSourceBlending,
-}
-
-/// A variant of [`EnableExtension::Unimplemented`].
-#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub enum UnimplementedEnableExtension {
    /// Enables `f16`/`half` primitive support in all shader languages.
    ///
    /// In the WGSL standard, this corresponds to [`enable f16;`].
    ///
    /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16
    F16,
+}
+
+/// A variant of [`EnableExtension::Unimplemented`].
+#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
+pub enum UnimplementedEnableExtension {
    /// Enables the `clip_distances` variable in WGSL.
    ///
    /// In the WGSL standard, this corresponds to [`enable clip_distances;`].
@@ -123,7 +126,6 @@ pub enum UnimplementedEnableExtension {
 impl UnimplementedEnableExtension {
    pub(crate) const fn tracking_issue_num(self) -> u16 {
        match self {
-            Self::F16 => 4384,
            Self::ClipDistances => 6236,
        }
    }
--- a/naga/src/front/wgsl/parse/lexer.rs
+++ b/naga/src/front/wgsl/parse/lexer.rs
@@ -413,14 +413,17 @@ impl<'a> Lexer<'a> {
    /// Parses a generic scalar type, for example `<f32>`.
    pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> {
        self.expect_generic_paren('<')?;
-        let pair = match self.next() {
+        let (scalar, _span) = match self.next() {
            (Token::Word(word), span) => {
-                conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
            }
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
+        };
+
        self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok(scalar)
    }

    /// Parses a generic scalar type, for example `<f32>`.
@@ -430,14 +433,18 @@ impl<'a> Lexer<'a> {
        &mut self,
    ) -> Result<'a, (Scalar, Span)> {
        self.expect_generic_paren('<')?;
-        let pair = match self.next() {
-            (Token::Word(word), span) => conv::get_scalar_type(word)
-                .map(|scalar| (scalar, span))
-                .ok_or(Error::UnknownScalarType(span)),
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+
+        let (scalar, span) = match self.next() {
+            (Token::Word(word), span) => {
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
+            }
+            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
+        };
+
        self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok((scalar, span))
    }

    pub(in crate::front::wgsl) fn next_storage_access(
@@ -518,6 +525,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) {

 #[test]
 fn test_numbers() {
+    use half::f16;
    // WGSL spec examples //

    // decimal integer
@@ -542,14 +550,16 @@ fn test_numbers() {
            Token::Number(Ok(Number::AbstractFloat(0.01))),
            Token::Number(Ok(Number::AbstractFloat(12.34))),
            Token::Number(Ok(Number::F32(0.))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
            Token::Number(Ok(Number::AbstractFloat(0.001))),
            Token::Number(Ok(Number::AbstractFloat(43.75))),
            Token::Number(Ok(Number::F32(16.))),
            Token::Number(Ok(Number::AbstractFloat(0.1875))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
            Token::Number(Ok(Number::AbstractFloat(0.12109375))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
        ],
    );

--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -6,9 +6,7 @@ use crate::diagnostic_filter::{
    ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule,
 };
 use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken};
-use crate::front::wgsl::parse::directive::enable_extension::{
-    EnableExtension, EnableExtensions, UnimplementedEnableExtension,
-};
+use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions};
 use crate::front::wgsl::parse::directive::language_extension::LanguageExtension;
 use crate::front::wgsl::parse::directive::DirectiveKind;
 use crate::front::wgsl::parse::lexer::{Lexer, Token};
@@ -364,7 +362,7 @@ impl Parser {
        span: Span,
        ctx: &mut ExpressionContext<'a, '_, '_>,
    ) -> Result<'a, Option<ast::ConstructorType<'a>>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
            return Ok(Some(ast::ConstructorType::Scalar(scalar)));
        }

@@ -393,6 +391,13 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "vec2h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "vec3" => ast::ConstructorType::PartialVector {
                size: crate::VectorSize::Tri,
            },
@@ -417,6 +422,13 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "vec3h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "vec4" => ast::ConstructorType::PartialVector {
                size: crate::VectorSize::Quad,
            },
@@ -441,6 +453,13 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "vec4h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat2x2" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Bi,
                rows: crate::VectorSize::Bi,
@@ -453,6 +472,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat2x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat2x3" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Bi,
                rows: crate::VectorSize::Tri,
@@ -465,6 +492,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat2x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat2x4" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Bi,
                rows: crate::VectorSize::Quad,
@@ -477,6 +512,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat2x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat3x2" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Tri,
                rows: crate::VectorSize::Bi,
@@ -489,6 +532,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat3x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat3x3" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Tri,
                rows: crate::VectorSize::Tri,
@@ -501,6 +552,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat3x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat3x4" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Tri,
                rows: crate::VectorSize::Quad,
@@ -513,6 +572,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat3x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat4x2" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Quad,
                rows: crate::VectorSize::Bi,
@@ -525,6 +592,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat4x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat4x3" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Quad,
                rows: crate::VectorSize::Tri,
@@ -537,6 +612,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat4x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "mat4x4" => ast::ConstructorType::PartialMatrix {
                columns: crate::VectorSize::Quad,
                rows: crate::VectorSize::Quad,
@@ -549,6 +632,14 @@ impl Parser {
                    ty_span: Span::UNDEFINED,
                }))
            }
+            "mat4x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
            "array" => ast::ConstructorType::PartialArray,
            "atomic"
            | "binding_array"
@@ -744,15 +835,17 @@ impl Parser {
            }
            (Token::Number(res), span) => {
                let _ = lexer.next();
-                let num = res.map_err(|err| match err {
-                    super::error::NumberError::UnimplementedF16 => {
-                        Error::EnableExtensionNotEnabled {
-                            kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16),
+                let num = res.map_err(|err| Error::BadNumber(span, err))?;
+
+                if let Some(enable_extension) = num.requires_enable_extension() {
+                    if !lexer.enable_extensions.contains(enable_extension) {
+                        return Err(Box::new(Error::EnableExtensionNotEnabled {
+                            kind: enable_extension.into(),
                            span,
-                        }
+                        }));
                    }
-                    err => Error::BadNumber(span, err),
-                })?;
+                }
+
                ast::Expression::Literal(ast::Literal::Number(num))
            }
            (Token::Word("RAY_FLAG_NONE"), _) => {
@@ -1340,9 +1433,10 @@ impl Parser {
        &mut self,
        lexer: &mut Lexer<'a>,
        word: &'a str,
+        span: Span,
        ctx: &mut ExpressionContext<'a, '_, '_>,
    ) -> Result<'a, Option<ast::Type<'a>>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
            return Ok(Some(ast::Type::Scalar(scalar)));
        }

@@ -1370,6 +1464,11 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "vec2h" => ast::Type::Vector {
+                size: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "vec3" => {
                let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                ast::Type::Vector {
@@ -1393,6 +1492,11 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "vec3h" => ast::Type::Vector {
+                size: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "vec4" => {
                let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                ast::Type::Vector {
@@ -1416,6 +1520,11 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "vec4h" => ast::Type::Vector {
+                size: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat2x2" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)?
            }
@@ -1425,6 +1534,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat2x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat2x3" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)?
            }
@@ -1434,6 +1549,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat2x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat2x4" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)?
            }
@@ -1443,6 +1564,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat2x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat3x2" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)?
            }
@@ -1452,6 +1579,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat3x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat3x3" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)?
            }
@@ -1461,6 +1594,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat3x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat3x4" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)?
            }
@@ -1470,6 +1609,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat3x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat4x2" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)?
            }
@@ -1479,6 +1624,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat4x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat4x3" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)?
            }
@@ -1488,6 +1639,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat4x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "mat4x4" => {
                self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)?
            }
@@ -1497,6 +1654,12 @@ impl Parser {
                ty: ctx.new_scalar(Scalar::F32),
                ty_span: Span::UNDEFINED,
            },
+            "mat4x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
            "atomic" => {
                let scalar = lexer.next_scalar_generic()?;
                ast::Type::Atomic(scalar)
@@ -1763,7 +1926,7 @@ impl Parser {

            let (name, span) = lexer.next_ident_with_span()?;

-            let ty = match this.type_decl_impl(lexer, name, ctx)? {
+            let ty = match this.type_decl_impl(lexer, name, span, ctx)? {
                Some(ty) => ty,
                None => {
                    ctx.unresolved.insert(ast::Dependency {
--- a/naga/src/front/wgsl/parse/number.rs
+++ b/naga/src/front/wgsl/parse/number.rs
@@ -1,7 +1,9 @@
 use alloc::format;

 use crate::front::wgsl::error::NumberError;
+use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension;
 use crate::front::wgsl::parse::lexer::Token;
+use half::f16;

 /// When using this type assume no Abstract Int/Float for now
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -18,12 +20,23 @@ pub enum Number {
    I64(i64),
    /// Concrete u64
    U64(u64),
+    /// Concrete f16
+    F16(f16),
    /// Concrete f32
    F32(f32),
    /// Concrete f64
    F64(f64),
 }

+impl Number {
+    pub(super) const fn requires_enable_extension(&self) -> Option<ImplementedEnableExtension> {
+        match *self {
+            Number::F16(_) => Some(ImplementedEnableExtension::F16),
+            _ => None,
+        }
+    }
+}
+
 pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) {
    let (result, rest) = parse(input);
    (Token::Number(result), rest)
@@ -369,7 +382,8 @@ fn parse_hex_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
            // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
            _ => Err(NumberError::NotRepresentable),
        },
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        // TODO: f16 is not supported by hexf_parse
+        Some(FloatKind::F16) => Err(NumberError::NotRepresentable),
        Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) {
            Ok(num) => Ok(Number::F32(num)),
            // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
@@ -405,7 +419,12 @@ fn parse_dec_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
                .then_some(Number::F64(num))
                .ok_or(NumberError::NotRepresentable)
        }
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        Some(FloatKind::F16) => {
+            let num = input.parse::<f16>().unwrap(); // will never fail
+            num.is_finite()
+                .then_some(Number::F16(num))
+                .ok_or(NumberError::NotRepresentable)
+        }
    }
 }

--- a/naga/src/ir/mod.rs
+++ b/naga/src/ir/mod.rs
@@ -225,6 +225,7 @@ use alloc::{string::String, vec::Vec};

 #[cfg(feature = "arbitrary")]
 use arbitrary::Arbitrary;
+use half::f16;
 #[cfg(feature = "deserialize")]
 use serde::Deserialize;
 #[cfg(feature = "serialize")]
@@ -818,6 +819,8 @@ pub enum Literal {
    F64(f64),
    /// May not be NaN or infinity.
    F32(f32),
+    /// May not be NaN or infinity.
+    F16(f16),
    U32(u32),
    I32(i32),
    U64(u64),
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -7,6 +7,8 @@ use alloc::{
 use core::iter;

 use arrayvec::ArrayVec;
+use half::f16;
+use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero};

 use crate::{
    arena::{Arena, Handle, HandleVec, UniqueArena},
@@ -208,6 +210,7 @@ gen_component_wise_extractor! {
    literals: [
        AbstractFloat => AbstractFloat: f64,
        F32 => F32: f32,
+        F16 => F16: f16,
        AbstractInt => AbstractInt: i64,
        U32 => U32: u32,
        I32 => I32: i32,
@@ -228,6 +231,7 @@ gen_component_wise_extractor! {
    literals: [
        AbstractFloat => Abstract: f64,
        F32 => F32: f32,
+        F16 => F16: f16,
    ],
    scalar_kinds: [
        Float,
@@ -253,6 +257,7 @@ gen_component_wise_extractor! {
        AbstractFloat => AbstractFloat: f64,
        AbstractInt => AbstractInt: i64,
        F32 => F32: f32,
+        F16 => F16: f16,
        I32 => I32: i32,
    ],
    scalar_kinds: [
@@ -1114,6 +1119,7 @@ impl<'a> ConstantEvaluator<'a> {
                component_wise_scalar(self, span, [arg], |args| match args {
                    Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])),
                    Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])),
+                    Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])),
                    Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
                    Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
                    Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
@@ -1145,9 +1151,13 @@ impl<'a> ConstantEvaluator<'a> {
                    }
                )
            }
-            crate::MathFunction::Saturate => {
-                component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) })
-            }
+            crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e {
+                Float::F16([e]) => Ok(Float::F16(
+                    [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))],
+                )),
+                Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])),
+                Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])),
+            }),

            // trigonometry
            crate::MathFunction::Cos => {
@@ -1201,7 +1211,35 @@ impl<'a> ConstantEvaluator<'a> {
                component_wise_float!(self, span, [arg], |e| { Ok([e.floor()]) })
            }
            crate::MathFunction::Round => {
-                component_wise_float!(self, span, [arg], |e| { Ok([e.round_ties_even()]) })
+                component_wise_float(self, span, [arg], |e| match e {
+                    Float::Abstract([e]) => Ok(Float::Abstract([e.round_ties_even()])),
+                    Float::F32([e]) => Ok(Float::F32([e.round_ties_even()])),
+                    Float::F16([e]) => {
+                        // TODO: `round_ties_even` is not available on `half::f16` yet.
+                        //
+                        // This polyfill is shamelessly [~~stolen from~~ inspired by `ndarray-image`][polyfill source],
+                        // which has licensing compatible with ours. See also
+                        // <https://github.com/rust-lang/rust/issues/96710>.
+                        //
+                        // [polyfill source]: https://github.com/imeka/ndarray-ndimage/blob/8b14b4d6ecfbc96a8a052f802e342a7049c68d8f/src/lib.rs#L98
+                        fn round_ties_even(x: f64) -> f64 {
+                            let i = x as i64;
+                            let f = (x - i as f64).abs();
+                            if f == 0.5 {
+                                if i & 1 == 1 {
+                                    // -1.5, 1.5, 3.5, ...
+                                    (x.abs() + 0.5).copysign(x)
+                                } else {
+                                    (x.abs() - 0.5).copysign(x)
+                                }
+                            } else {
+                                x.round()
+                            }
+                        }
+
+                        Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))]))
+                    }
+                })
            }
            crate::MathFunction::Fract => {
                component_wise_float!(self, span, [arg], |e| {
@@ -1246,15 +1284,27 @@ impl<'a> ConstantEvaluator<'a> {
                )
            }
            crate::MathFunction::Step => {
-                component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| {
-                    Ok([if edge <= x { 1.0 } else { 0.0 }])
+                component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x {
+                    Float::Abstract([edge, x]) => {
+                        Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }]))
+                    }
+                    Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])),
+                    Float::F16([edge, x]) => Ok(Float::F16([if edge <= x {
+                        f16::one()
+                    } else {
+                        f16::zero()
+                    }])),
                })
            }
            crate::MathFunction::Sqrt => {
                component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) })
            }
            crate::MathFunction::InverseSqrt => {
-                component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) })
+                component_wise_float(self, span, [arg], |e| match e {
+                    Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])),
+                    Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])),
+                    Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])),
+                })
            }

            // bits
@@ -1549,6 +1599,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::I32(v) => v,
                        Literal::U32(v) => v as i32,
                        Literal::F32(v) => v as i32,
+                        Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf
                        Literal::Bool(v) => v as i32,
                        Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                            return make_error();
@@ -1560,6 +1611,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::I32(v) => v as u32,
                        Literal::U32(v) => v,
                        Literal::F32(v) => v as u32,
+                        Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf
                        Literal::Bool(v) => v as u32,
                        Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                            return make_error();
@@ -1575,6 +1627,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::F64(v) => v as i64,
                        Literal::I64(v) => v,
                        Literal::U64(v) => v as i64,
+                        Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf
                        Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
                        Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
                    }),
@@ -1586,9 +1639,22 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::F64(v) => v as u64,
                        Literal::I64(v) => v as u64,
                        Literal::U64(v) => v,
+                        Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf
                        Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
                        Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
                    }),
+                    Sc::F16 => Literal::F16(match literal {
+                        Literal::F16(v) => v,
+                        Literal::F32(v) => f16::from_f32(v),
+                        Literal::F64(v) => f16::from_f64(v),
+                        Literal::Bool(v) => f16::from_u32(v as u32).unwrap(),
+                        Literal::I64(v) => f16::from_i64(v).unwrap(),
+                        Literal::U64(v) => f16::from_u64(v).unwrap(),
+                        Literal::I32(v) => f16::from_i32(v).unwrap(),
+                        Literal::U32(v) => f16::from_u32(v).unwrap(),
+                        Literal::AbstractFloat(v) => f16::try_from_abstract(v)?,
+                        Literal::AbstractInt(v) => f16::try_from_abstract(v)?,
+                    }),
                    Sc::F32 => Literal::F32(match literal {
                        Literal::I32(v) => v as f32,
                        Literal::U32(v) => v as f32,
@@ -1597,12 +1663,14 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                            return make_error();
                        }
+                        Literal::F16(v) => f16::to_f32(v),
                        Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
                        Literal::AbstractFloat(v) => f32::try_from_abstract(v)?,
                    }),
                    Sc::F64 => Literal::F64(match literal {
                        Literal::I32(v) => v as f64,
                        Literal::U32(v) => v as f64,
+                        Literal::F16(v) => f16::to_f64(v),
                        Literal::F32(v) => v as f64,
                        Literal::F64(v) => v,
                        Literal::Bool(v) => v as u32 as f64,
@@ -1614,6 +1682,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::I32(v) => v != 0,
                        Literal::U32(v) => v != 0,
                        Literal::F32(v) => v != 0.0,
+                        Literal::F16(v) => v != f16::zero(),
                        Literal::Bool(v) => v,
                        Literal::F64(_)
                        | Literal::I64(_)
@@ -1770,6 +1839,7 @@ impl<'a> ConstantEvaluator<'a> {
                    Literal::I32(v) => Literal::I32(v.wrapping_neg()),
                    Literal::I64(v) => Literal::I64(v.wrapping_neg()),
                    Literal::F32(v) => Literal::F32(-v),
+                    Literal::F16(v) => Literal::F16(-v),
                    Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()),
                    Literal::AbstractFloat(v) => Literal::AbstractFloat(-v),
                    _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg),
@@ -1918,6 +1988,14 @@ impl<'a> ConstantEvaluator<'a> {
                                _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
                            })
                        }
+                        (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op {
+                            BinaryOperator::Add => a + b,
+                            BinaryOperator::Subtract => a - b,
+                            BinaryOperator::Multiply => a * b,
+                            BinaryOperator::Divide => a / b,
+                            BinaryOperator::Modulo => a % b,
+                            _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
+                        }),
                        (Literal::AbstractInt(a), Literal::AbstractInt(b)) => {
                            Literal::AbstractInt(match op {
                                BinaryOperator::Add => a.checked_add(b).ok_or_else(|| {
@@ -2522,6 +2600,32 @@ impl TryFromAbstract<f64> for u64 {
    }
 }

+impl TryFromAbstract<f64> for f16 {
+    fn try_from_abstract(value: f64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_f64(value);
+        if f.is_infinite() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f)
+    }
+}
+
+impl TryFromAbstract<i64> for f16 {
+    fn try_from_abstract(value: i64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_i64(value);
+        if f.is_none() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f.unwrap())
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use alloc::{vec, vec::Vec};
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -80,6 +80,7 @@ impl From<super::StorageFormat> for super::Scalar {
 pub enum HashableLiteral {
    F64(u64),
    F32(u32),
+    F16(u16),
    U32(u32),
    I32(i32),
    U64(u64),
@@ -94,6 +95,7 @@ impl From<crate::Literal> for HashableLiteral {
        match l {
            crate::Literal::F64(v) => Self::F64(v.to_bits()),
            crate::Literal::F32(v) => Self::F32(v.to_bits()),
+            crate::Literal::F16(v) => Self::F16(v.to_bits()),
            crate::Literal::U32(v) => Self::U32(v),
            crate::Literal::I32(v) => Self::I32(v),
            crate::Literal::U64(v) => Self::U64(v),
@@ -132,6 +134,7 @@ impl crate::Literal {
        match *self {
            Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
            Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
+            Self::F16(_) => 2,
            Self::Bool(_) => crate::BOOL_WIDTH,
            Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
        }
@@ -140,6 +143,7 @@ impl crate::Literal {
        match *self {
            Self::F64(_) => crate::Scalar::F64,
            Self::F32(_) => crate::Scalar::F32,
+            Self::F16(_) => crate::Scalar::F16,
            Self::U32(_) => crate::Scalar::U32,
            Self::I32(_) => crate::Scalar::I32,
            Self::U64(_) => crate::Scalar::U64,
--- a/naga/src/proc/type_methods.rs
+++ b/naga/src/proc/type_methods.rs
@@ -20,6 +20,10 @@ impl crate::ScalarKind {
 }

 impl crate::Scalar {
+    pub const F16: Self = Self {
+        kind: crate::ScalarKind::Float,
+        width: 2,
+    };
    pub const I32: Self = Self {
        kind: crate::ScalarKind::Sint,
        width: 4,
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1794,7 +1794,7 @@ impl super::Validator {
    }

    pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> {
-        self.check_width(literal.scalar())?;
+        let _ = self.check_width(literal.scalar())?;
        check_literal_value(literal)?;

        Ok(())
--- a/naga/src/valid/interface.rs
+++ b/naga/src/valid/interface.rs
@@ -4,7 +4,7 @@ use bit_set::BitSet;

 use super::{
    analyzer::{FunctionInfo, GlobalUse},
-    Capabilities, Disalignment, FunctionError, ModuleInfo,
+    Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError,
 };
 use crate::arena::{Handle, UniqueArena};
 use crate::span::{AddSpan as _, MapErrWithSpan as _, SpanProvider as _, WithSpan};
@@ -41,6 +41,8 @@ pub enum GlobalVariableError {
    InitializerNotAllowed(crate::AddressSpace),
    #[error("Storage address space doesn't support write-only access")]
    StorageAddressSpaceWriteOnlyNotSupported,
+    #[error("Type is not valid for use as a push constant")]
+    InvalidPushConstantType(#[source] PushConstantError),
 }

 #[derive(Clone, Debug, thiserror::Error)]
@@ -596,6 +598,9 @@ impl super::Validator {
                        Capabilities::PUSH_CONSTANT,
                    ));
                }
+                if let Err(ref err) = type_info.push_constant_compatibility {
+                    return Err(GlobalVariableError::InvalidPushConstantType(err.clone()));
+                }
                (
                    TypeFlags::DATA
                        | TypeFlags::COPY
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -31,7 +31,7 @@ pub use expression::{check_literal_value, LiteralError};
 pub use expression::{ConstExpressionError, ExpressionError};
 pub use function::{CallError, FunctionError, LocalVariableError};
 pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
-pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};
+pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError};

 use self::handles::InvalidHandleError;

@@ -163,6 +163,8 @@ bitflags::bitflags! {
        const TEXTURE_INT64_ATOMIC = 1 << 24;
        /// Support for ray queries returning vertex position
        const RAY_HIT_VERTEX_POSITION = 1 << 25;
+        /// Support for 16-bit floating-point types.
+        const SHADER_FLOAT16 = 1 << 26;
    }
 }

--- a/naga/src/valid/type.rs
+++ b/naga/src/valid/type.rs
@@ -169,8 +169,16 @@ pub enum WidthError {
    Abstract,
 }

+#[derive(Clone, Debug, thiserror::Error)]
+#[cfg_attr(test, derive(PartialEq))]
+pub enum PushConstantError {
+    #[error("The scalar type {0:?} is not supported in push constants")]
+    InvalidScalar(crate::Scalar),
+}
+
 // Only makes sense if `flags.contains(HOST_SHAREABLE)`
 type LayoutCompatibility = Result<Alignment, (Handle<crate::Type>, Disalignment)>;
+type PushConstantCompatibility = Result<(), PushConstantError>;

 fn check_member_layout(
    accum: &mut LayoutCompatibility,
@@ -221,6 +229,7 @@ pub(super) struct TypeInfo {
    pub flags: TypeFlags,
    pub uniform_layout: LayoutCompatibility,
    pub storage_layout: LayoutCompatibility,
+    pub push_constant_compatibility: PushConstantCompatibility,
 }

 impl TypeInfo {
@@ -229,6 +238,7 @@ impl TypeInfo {
            flags: TypeFlags::empty(),
            uniform_layout: Ok(Alignment::ONE),
            storage_layout: Ok(Alignment::ONE),
+            push_constant_compatibility: Ok(()),
        }
    }

@@ -237,6 +247,7 @@ impl TypeInfo {
            flags,
            uniform_layout: Ok(alignment),
            storage_layout: Ok(alignment),
+            push_constant_compatibility: Ok(()),
        }
    }
 }
@@ -250,11 +261,15 @@ impl super::Validator {
        }
    }

-    pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> {
+    pub(super) const fn check_width(
+        &self,
+        scalar: crate::Scalar,
+    ) -> Result<PushConstantCompatibility, WidthError> {
+        let mut push_constant_compatibility = Ok(());
        let good = match scalar.kind {
            crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH,
-            crate::ScalarKind::Float => {
-                if scalar.width == 8 {
+            crate::ScalarKind::Float => match scalar.width {
+                8 => {
                    if !self.capabilities.contains(Capabilities::FLOAT64) {
                        return Err(WidthError::MissingCapability {
                            name: "f64",
@@ -262,10 +277,21 @@ impl super::Validator {
                        });
                    }
                    true
-                } else {
-                    scalar.width == 4
                }
-            }
+                2 => {
+                    if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) {
+                        return Err(WidthError::MissingCapability {
+                            name: "f16",
+                            flag: "FLOAT16",
+                        });
+                    }
+
+                    push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar));
+
+                    true
+                }
+                _ => scalar.width == 4,
+            },
            crate::ScalarKind::Sint => {
                if scalar.width == 8 {
                    if !self.capabilities.contains(Capabilities::SHADER_INT64) {
@@ -297,7 +323,7 @@ impl super::Validator {
            }
        };
        if good {
-            Ok(())
+            Ok(push_constant_compatibility)
        } else {
            Err(WidthError::Invalid(scalar.kind, scalar.width))
        }
@@ -317,13 +343,13 @@ impl super::Validator {
        use crate::TypeInner as Ti;
        Ok(match gctx.types[handle].inner {
            Ti::Scalar(scalar) => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                let shareable = if scalar.kind.is_numeric() {
                    TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                } else {
                    TypeFlags::empty()
                };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                    TypeFlags::DATA
                        | TypeFlags::SIZED
                        | TypeFlags::COPY
@@ -332,16 +358,18 @@ impl super::Validator {
                        | TypeFlags::CREATION_RESOLVED
                        | shareable,
                    Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
            }
            Ti::Vector { size, scalar } => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                let shareable = if scalar.kind.is_numeric() {
                    TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                } else {
                    TypeFlags::empty()
                };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                    TypeFlags::DATA
                        | TypeFlags::SIZED
                        | TypeFlags::COPY
@@ -350,7 +378,9 @@ impl super::Validator {
                        | TypeFlags::CREATION_RESOLVED
                        | shareable,
                    Alignment::from(size) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
            }
            Ti::Matrix {
                columns: _,
@@ -360,8 +390,8 @@ impl super::Validator {
                if scalar.kind != crate::ScalarKind::Float {
                    return Err(TypeError::MatrixElementNotFloat);
                }
-                self.check_width(scalar)?;
-                TypeInfo::new(
+                let push_constant_compatibility = self.check_width(scalar)?;
+                let mut type_info = TypeInfo::new(
                    TypeFlags::DATA
                        | TypeFlags::SIZED
                        | TypeFlags::COPY
@@ -370,7 +400,9 @@ impl super::Validator {
                        | TypeFlags::CONSTRUCTIBLE
                        | TypeFlags::CREATION_RESOLVED,
                    Alignment::from(rows) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
            }
            Ti::Atomic(scalar) => {
                match scalar {
@@ -465,7 +497,7 @@ impl super::Validator {
                // However, some cases are trivial: All our implicit base types
                // are DATA and SIZED, so we can never return
                // `InvalidPointerBase` or `InvalidPointerToUnsized`.
-                self.check_width(scalar)?;
+                let _ = self.check_width(scalar)?;

                // `Validator::validate_function` actually checks the address
                // space of pointer arguments explicitly before checking the
@@ -551,6 +583,7 @@ impl super::Validator {
                    flags: base_info.flags & type_info_mask,
                    uniform_layout,
                    storage_layout,
+                    push_constant_compatibility: base_info.push_constant_compatibility.clone(),
                }
            }
            Ti::Struct { ref members, span } => {
@@ -631,6 +664,10 @@ impl super::Validator {
                        base_info.storage_layout,
                        handle,
                    );
+                    if base_info.push_constant_compatibility.is_err() {
+                        ti.push_constant_compatibility =
+                            base_info.push_constant_compatibility.clone();
+                    }

                    // Validate rule: If a structure member itself has a structure type S,
                    // then the number of bytes between the start of that member and
--- a/naga/tests/in/glsl/f16-glsl.comp
+++ b/naga/tests/in/glsl/f16-glsl.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
--- a/naga/tests/in/glsl/f16-glsl.toml
+++ b/naga/tests/in/glsl/f16-glsl.toml
@@ -0,0 +1 @@
+god_mode = true
--- a/naga/tests/in/spv/f16-spv.comp
+++ b/naga/tests/in/spv/f16-spv.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
--- a/naga/tests/in/spv/f16-spv.spvasm
+++ b/naga/tests/in/spv/f16-spv.spvasm
@@ -0,0 +1,130 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 11
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageBuffer16BitAccess
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %B "B"
+               OpMemberName %B 0 "b_1"
+               OpMemberName %B 1 "b_vec2"
+               OpMemberName %B 2 "b_vec3"
+               OpMemberName %B 3 "b_vec4"
+               OpMemberName %B 4 "b_mat2"
+               OpMemberName %B 5 "b_mat2x3"
+               OpMemberName %B 6 "b_mat2x4"
+               OpMemberName %B 7 "b_mat3x2"
+               OpMemberName %B 8 "b_mat3"
+               OpMemberName %B 9 "b_mat3x4"
+               OpMemberName %B 10 "b_mat4x2"
+               OpMemberName %B 11 "b_mat4x3"
+               OpMemberName %B 12 "b_mat4"
+               OpName %_ ""
+               OpName %A "A"
+               OpMemberName %A 0 "a_1"
+               OpMemberName %A 1 "a_vec2"
+               OpMemberName %A 2 "a_vec3"
+               OpMemberName %A 3 "a_vec4"
+               OpName %__0 ""
+               OpDecorate %B BufferBlock
+               OpMemberDecorate %B 0 Offset 0
+               OpMemberDecorate %B 1 Offset 4
+               OpMemberDecorate %B 2 Offset 8
+               OpMemberDecorate %B 3 Offset 16
+               OpMemberDecorate %B 4 ColMajor
+               OpMemberDecorate %B 4 MatrixStride 4
+               OpMemberDecorate %B 4 Offset 24
+               OpMemberDecorate %B 5 ColMajor
+               OpMemberDecorate %B 5 MatrixStride 8
+               OpMemberDecorate %B 5 Offset 32
+               OpMemberDecorate %B 6 ColMajor
+               OpMemberDecorate %B 6 MatrixStride 8
+               OpMemberDecorate %B 6 Offset 48
+               OpMemberDecorate %B 7 ColMajor
+               OpMemberDecorate %B 7 MatrixStride 4
+               OpMemberDecorate %B 7 Offset 64
+               OpMemberDecorate %B 8 ColMajor
+               OpMemberDecorate %B 8 MatrixStride 8
+               OpMemberDecorate %B 8 Offset 80
+               OpMemberDecorate %B 9 ColMajor
+               OpMemberDecorate %B 9 MatrixStride 8
+               OpMemberDecorate %B 9 Offset 104
+               OpMemberDecorate %B 10 ColMajor
+               OpMemberDecorate %B 10 MatrixStride 4
+               OpMemberDecorate %B 10 Offset 128
+               OpMemberDecorate %B 11 ColMajor
+               OpMemberDecorate %B 11 MatrixStride 8
+               OpMemberDecorate %B 11 Offset 144
+               OpMemberDecorate %B 12 ColMajor
+               OpMemberDecorate %B 12 MatrixStride 8
+               OpMemberDecorate %B 12 Offset 176
+               OpDecorate %_ Binding 1
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %A Block
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 4
+               OpMemberDecorate %A 2 Offset 8
+               OpMemberDecorate %A 3 Offset 16
+               OpDecorate %__0 Binding 0
+               OpDecorate %__0 DescriptorSet 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half
+%_ptr_Uniform_B = OpTypePointer Uniform %B
+          %_ = OpVariable %_ptr_Uniform_B Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+          %A = OpTypeStruct %half %v2half %v3half %v4half
+%_ptr_Uniform_A = OpTypePointer Uniform %A
+        %__0 = OpVariable %_ptr_Uniform_A Uniform
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0
+         %29 = OpLoad %half %28
+         %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0
+               OpStore %30 %29
+         %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1
+         %34 = OpLoad %v2half %33
+         %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1
+               OpStore %35 %34
+         %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2
+         %39 = OpLoad %v3half %38
+         %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2
+               OpStore %40 %39
+         %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3
+         %44 = OpLoad %v4half %43
+         %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3
+               OpStore %45 %44
+               OpReturn
+               OpFunctionEnd
--- a/naga/tests/in/spv/f16-spv.toml
+++ b/naga/tests/in/spv/f16-spv.toml
@@ -0,0 +1 @@
+god_mode = true
--- a/naga/tests/in/wgsl/extra.wgsl
+++ b/naga/tests/in/wgsl/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
    index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 var<push_constant> pc: PushConstants;

--- a/naga/tests/in/wgsl/f16.toml
+++ b/naga/tests/in/wgsl/f16.toml
@@ -0,0 +1,15 @@
+# No GLSL support for f16
+targets = "SPIRV | METAL | HLSL | WGSL"
+god_mode = true
+
+[spv]
+version = [1, 0]
+
+[hlsl]
+shader_model = "V6_2"
+special_constants_binding = { space = 1, register = 0 }
+push_constants_target = { space = 0, register = 0 }
+
+lang_version = [1, 0]
+per_entry_point_map = {}
+zero_initialize_workgroup_memory = true
--- a/naga/tests/in/wgsl/f16.wgsl
+++ b/naga/tests/in/wgsl/f16.wgsl
@@ -0,0 +1,127 @@
+enable f16;
+
+var<private> private_variable: f16 = 1h;
+const constant_variable: f16 = f16(15.2);
+
+struct UniformCompatible {
+   // Other types
+   val_u32: u32,
+   val_i32: i32,
+   val_f32: f32,
+
+   // f16
+   val_f16: f16,
+   val_f16_2: vec2<f16>,
+   val_f16_3: vec3<f16>,
+   val_f16_4: vec4<f16>,
+   final_value: f16,
+
+   val_mat2x2: mat2x2<f16>,
+   val_mat2x3: mat2x3<f16>,
+   val_mat2x4: mat2x4<f16>,
+   val_mat3x2: mat3x2<f16>,
+   val_mat3x3: mat3x3<f16>,
+   val_mat3x4: mat3x4<f16>,
+   val_mat4x2: mat4x2<f16>,
+   val_mat4x3: mat4x3<f16>,
+   val_mat4x4: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+   val_f16_array_2: array<f16, 2>,
+}
+
+struct LayoutTest {
+   scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32
+}
+
+@group(0) @binding(0)
+var<uniform> input_uniform: UniformCompatible;
+
+@group(0) @binding(1)
+var<storage> input_storage: UniformCompatible;
+
+@group(0) @binding(2)
+var<storage> input_arrays: StorageCompatible;
+
+@group(0) @binding(3)
+var<storage, read_write> output: UniformCompatible;
+
+@group(0) @binding(4)
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+   var val: f16 = f16(constant_variable);
+   // A number too big for f16
+   val += 1h - 33333h;
+   // Constructing an f16 from an AbstractInt
+   val += val + f16(5.);
+   // Constructing a f16 from other types and other types from f16.
+   val += f16(input_uniform.val_f32 + f32(val));
+   // Constructing a vec3<i64> from a i64
+   val += vec3<f16>(input_uniform.val_f16).z;
+
+   // Reading/writing to a uniform/storage buffer
+   output.val_f16 = input_uniform.val_f16 + input_storage.val_f16;
+   output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2;
+   output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3;
+   output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4;
+
+   output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2;
+   output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3;
+   output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4;
+   output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2;
+   output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3;
+   output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4;
+   output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2;
+   output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3;
+   output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4;
+
+   output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2;
+
+   // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
+   // constants based on the size of the type, and which are arguments.
+
+   // Numeric functions
+   val += abs(val);
+   val += clamp(val, val, val);
+   val += dot(vec2(val), vec2(val));
+   val += max(val, val);
+   val += min(val, val);
+   val += sign(val);
+   
+   val += f16(1.0);
+
+   // We use the shorthand aliases here to ensure the aliases
+   // work correctly.
+
+   // Cast vectors to/from f32
+   let float_vec2 = vec2f(input_uniform.val_f16_2);
+   output.val_f16_2 = vec2h(float_vec2);
+
+   let float_vec3 = vec3f(input_uniform.val_f16_3);
+   output.val_f16_3 = vec3h(float_vec3);
+
+   let float_vec4 = vec4f(input_uniform.val_f16_4);
+   output.val_f16_4 = vec4h(float_vec4);
+
+   // Cast matrices to/from f32
+   output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2));
+   output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3));
+   output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4));
+   output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2));
+   output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3));
+   output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4));
+   output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2));
+   output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3));
+   output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4));
+
+   // Make sure all the variables are used.
+   return val;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+   output.final_value = f16_function(2h);
+}
+
--- a/naga/tests/out/hlsl/f16.hlsl
+++ b/naga/tests/out/hlsl/f16.hlsl
@@ -0,0 +1,351 @@
+struct NagaConstants {
+    int first_vertex;
+    int first_instance;
+    uint other;
+};
+ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    half2 val_f16_2_;
+    int _pad5_0;
+    half3 val_f16_3_;
+    half4 val_f16_4_;
+    half final_value;
+    half2 val_mat2x2__0; half2 val_mat2x2__1;
+    int _pad9_0;
+    row_major half2x3 val_mat2x3_;
+    row_major half2x4 val_mat2x4_;
+    half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2;
+    int _pad12_0;
+    row_major half3x3 val_mat3x3_;
+    row_major half3x4 val_mat3x4_;
+    half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3;
+    row_major half4x3 val_mat4x3_;
+    row_major half4x4 val_mat4x4_;
+};
+
+struct StorageCompatible {
+    half val_f16_array_2_[2];
+};
+
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    int _pad2_0;
+    half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    uint larger;
+};
+
+static const half constant_variable = 15.203125h;
+
+static half private_variable = 1.0h;
+cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
+ByteAddressBuffer input_storage : register(t1);
+ByteAddressBuffer input_arrays : register(t2);
+RWByteAddressBuffer output : register(u3);
+RWByteAddressBuffer output_arrays : register(u4);
+
+half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) {
+    return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1);
+}
+
+void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) {
+    obj.val_mat2x2__0 = mat[0];
+    obj.val_mat2x2__1 = mat[1];
+}
+
+void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0 = vec; break; }
+    case 1: { obj.val_mat2x2__1 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; }
+    }
+}
+
+half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) {
+    return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2);
+}
+
+void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) {
+    obj.val_mat3x2__0 = mat[0];
+    obj.val_mat3x2__1 = mat[1];
+    obj.val_mat3x2__2 = mat[2];
+}
+
+void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0 = vec; break; }
+    case 1: { obj.val_mat3x2__1 = vec; break; }
+    case 2: { obj.val_mat3x2__2 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; }
+    }
+}
+
+half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) {
+    return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3);
+}
+
+void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) {
+    obj.val_mat4x2__0 = mat[0];
+    obj.val_mat4x2__1 = mat[1];
+    obj.val_mat4x2__2 = mat[2];
+    obj.val_mat4x2__3 = mat[3];
+}
+
+void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0 = vec; break; }
+    case 1: { obj.val_mat4x2__1 = vec; break; }
+    case 2: { obj.val_mat4x2__2 = vec; break; }
+    case 3: { obj.val_mat4x2__3 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; }
+    case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; }
+    }
+}
+
+typedef half ret_Constructarray2_half_[2];
+ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) {
+    half ret[2] = { arg0, arg1 };
+    return ret;
+}
+
+half f16_function(half x)
+{
+    half val = 15.203125h;
+
+    half _e4 = val;
+    val = (_e4 + -33344.0h);
+    half _e6 = val;
+    half _e9 = val;
+    val = (_e9 + (_e6 + 5.0h));
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = (_e18 + half((_e13 + float(_e14))));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = (_e25 + (_e22).xxx.z);
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.Load<half>(12);
+    output.Store(12, (_e31 + _e34));
+    half2 _e40 = input_uniform.val_f16_2_;
+    half2 _e43 = input_storage.Load<half2>(16);
+    output.Store(16, (_e40 + _e43));
+    half3 _e49 = input_uniform.val_f16_3_;
+    half3 _e52 = input_storage.Load<half3>(24);
+    output.Store(24, (_e49 + _e52));
+    half4 _e58 = input_uniform.val_f16_4_;
+    half4 _e61 = input_storage.Load<half4>(32);
+    output.Store(32, (_e58 + _e61));
+    half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    half2x2 _e70 = half2x2(input_storage.Load<half2>(44+0), input_storage.Load<half2>(44+4));
+    {
+        half2x2 _value2 = (_e67 + _e70);
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e76 = input_uniform.val_mat2x3_;
+    half2x3 _e79 = half2x3(input_storage.Load<half3>(56+0), input_storage.Load<half3>(56+8));
+    {
+        half2x3 _value2 = (_e76 + _e79);
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e85 = input_uniform.val_mat2x4_;
+    half2x4 _e88 = half2x4(input_storage.Load<half4>(72+0), input_storage.Load<half4>(72+8));
+    {
+        half2x4 _value2 = (_e85 + _e88);
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    half3x2 _e97 = half3x2(input_storage.Load<half2>(88+0), input_storage.Load<half2>(88+4), input_storage.Load<half2>(88+8));
+    {
+        half3x2 _value2 = (_e94 + _e97);
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e103 = input_uniform.val_mat3x3_;
+    half3x3 _e106 = half3x3(input_storage.Load<half3>(104+0), input_storage.Load<half3>(104+8), input_storage.Load<half3>(104+16));
+    {
+        half3x3 _value2 = (_e103 + _e106);
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e112 = input_uniform.val_mat3x4_;
+    half3x4 _e115 = half3x4(input_storage.Load<half4>(128+0), input_storage.Load<half4>(128+8), input_storage.Load<half4>(128+16));
+    {
+        half3x4 _value2 = (_e112 + _e115);
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    half4x2 _e124 = half4x2(input_storage.Load<half2>(152+0), input_storage.Load<half2>(152+4), input_storage.Load<half2>(152+8), input_storage.Load<half2>(152+12));
+    {
+        half4x2 _value2 = (_e121 + _e124);
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e130 = input_uniform.val_mat4x3_;
+    half4x3 _e133 = half4x3(input_storage.Load<half3>(168+0), input_storage.Load<half3>(168+8), input_storage.Load<half3>(168+16), input_storage.Load<half3>(168+24));
+    {
+        half4x3 _value2 = (_e130 + _e133);
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e139 = input_uniform.val_mat4x4_;
+    half4x4 _e142 = half4x4(input_storage.Load<half4>(200+0), input_storage.Load<half4>(200+8), input_storage.Load<half4>(200+16), input_storage.Load<half4>(200+24));
+    {
+        half4x4 _value2 = (_e139 + _e142);
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e148[2] = Constructarray2_half_(input_arrays.Load<half>(0+0), input_arrays.Load<half>(0+2));
+    {
+        half _value2[2] = _e148;
+        output_arrays.Store(0+0, _value2[0]);
+        output_arrays.Store(0+2, _value2[1]);
+    }
+    half _e149 = val;
+    half _e151 = val;
+    val = (_e151 + abs(_e149));
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = (_e164 + dot((_e159).xx, (_e161).xx));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    half _e176 = val;
+    half _e178 = val;
+    val = (_e178 + sign(_e176));
+    half _e181 = val;
+    val = (_e181 + 1.0h);
+    half2 _e185 = input_uniform.val_f16_2_;
+    float2 float_vec2_ = float2(_e185);
+    output.Store(16, half2(float_vec2_));
+    half3 _e192 = input_uniform.val_f16_3_;
+    float3 float_vec3_ = float3(_e192);
+    output.Store(24, half3(float_vec3_));
+    half4 _e199 = input_uniform.val_f16_4_;
+    float4 float_vec4_ = float4(_e199);
+    output.Store(32, half4(float_vec4_));
+    half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    {
+        half2x2 _value2 = half2x2(float2x2(_e208));
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e215 = input_uniform.val_mat2x3_;
+    {
+        half2x3 _value2 = half2x3(float2x3(_e215));
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e222 = input_uniform.val_mat2x4_;
+    {
+        half2x4 _value2 = half2x4(float2x4(_e222));
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    {
+        half3x2 _value2 = half3x2(float3x2(_e229));
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e236 = input_uniform.val_mat3x3_;
+    {
+        half3x3 _value2 = half3x3(float3x3(_e236));
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e243 = input_uniform.val_mat3x4_;
+    {
+        half3x4 _value2 = half3x4(float3x4(_e243));
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    {
+        half4x2 _value2 = half4x2(float4x2(_e250));
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e257 = input_uniform.val_mat4x3_;
+    {
+        half4x3 _value2 = half4x3(float4x3(_e257));
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e264 = input_uniform.val_mat4x4_;
+    {
+        half4x4 _value2 = half4x4(float4x4(_e264));
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e267 = val;
+    return _e267;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const half _e3 = f16_function(2.0h);
+    output.Store(40, _e3);
+    return;
+}
--- a/naga/tests/out/hlsl/f16.ron
+++ b/naga/tests/out/hlsl/f16.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_2",
+        ),
+    ],
+)
--- a/naga/tests/out/msl/extra.msl
+++ b/naga/tests/out/msl/extra.msl
@@ -6,7 +6,7 @@ using metal::uint;

 struct PushConstants {
    uint index;
-    char _pad1[12];
+    char _pad1[4];
    metal::float2 double_;
 };
 struct FragmentIn {
--- a/naga/tests/out/msl/f16.msl
+++ b/naga/tests/out/msl/f16.msl
@@ -0,0 +1,177 @@
+// language: metal1.0
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using metal::uint;
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    char _pad4[2];
+    metal::half2 val_f16_2_;
+    char _pad5[4];
+    metal::half3 val_f16_3_;
+    metal::half4 val_f16_4_;
+    half final_value;
+    char _pad8[2];
+    metal::half2x2 val_mat2x2_;
+    char _pad9[4];
+    metal::half2x3 val_mat2x3_;
+    metal::half2x4 val_mat2x4_;
+    metal::half3x2 val_mat3x2_;
+    char _pad12[4];
+    metal::half3x3 val_mat3x3_;
+    metal::half3x4 val_mat3x4_;
+    metal::half4x2 val_mat4x2_;
+    metal::half4x3 val_mat4x3_;
+    metal::half4x4 val_mat4x4_;
+};
+struct type_16 {
+    half inner[2];
+};
+struct StorageCompatible {
+    type_16 val_f16_array_2_;
+};
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    char _pad2[4];
+    metal::packed_half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    char _pad5[2];
+    uint larger;
+};
+constant half constant_variable = 15.203125h;
+
+half f16_function(
+    half x,
+    constant UniformCompatible& input_uniform,
+    device UniformCompatible const& input_storage,
+    device StorageCompatible const& input_arrays,
+    device UniformCompatible& output,
+    device StorageCompatible& output_arrays
+) {
+    half val = 15.203125h;
+    half _e4 = val;
+    val = _e4 + -33344.0h;
+    half _e6 = val;
+    half _e9 = val;
+    val = _e9 + (_e6 + 5.0h);
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = _e18 + static_cast<half>(_e13 + static_cast<float>(_e14));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = _e25 + metal::half3(_e22).z;
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.val_f16_;
+    output.val_f16_ = _e31 + _e34;
+    metal::half2 _e40 = input_uniform.val_f16_2_;
+    metal::half2 _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = _e40 + _e43;
+    metal::half3 _e49 = input_uniform.val_f16_3_;
+    metal::half3 _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = _e49 + _e52;
+    metal::half4 _e58 = input_uniform.val_f16_4_;
+    metal::half4 _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = _e58 + _e61;
+    metal::half2x2 _e67 = input_uniform.val_mat2x2_;
+    metal::half2x2 _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = _e67 + _e70;
+    metal::half2x3 _e76 = input_uniform.val_mat2x3_;
+    metal::half2x3 _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = _e76 + _e79;
+    metal::half2x4 _e85 = input_uniform.val_mat2x4_;
+    metal::half2x4 _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = _e85 + _e88;
+    metal::half3x2 _e94 = input_uniform.val_mat3x2_;
+    metal::half3x2 _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = _e94 + _e97;
+    metal::half3x3 _e103 = input_uniform.val_mat3x3_;
+    metal::half3x3 _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = _e103 + _e106;
+    metal::half3x4 _e112 = input_uniform.val_mat3x4_;
+    metal::half3x4 _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = _e112 + _e115;
+    metal::half4x2 _e121 = input_uniform.val_mat4x2_;
+    metal::half4x2 _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = _e121 + _e124;
+    metal::half4x3 _e130 = input_uniform.val_mat4x3_;
+    metal::half4x3 _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = _e130 + _e133;
+    metal::half4x4 _e139 = input_uniform.val_mat4x4_;
+    metal::half4x4 _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = _e139 + _e142;
+    type_16 _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    half _e149 = val;
+    half _e151 = val;
+    val = _e151 + metal::abs(_e149);
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = _e157 + metal::clamp(_e153, _e154, _e155);
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = _e169 + metal::max(_e166, _e167);
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = _e174 + metal::min(_e171, _e172);
+    half _e176 = val;
+    half _e178 = val;
+    val = _e178 + metal::sign(_e176);
+    half _e181 = val;
+    val = _e181 + 1.0h;
+    metal::half2 _e185 = input_uniform.val_f16_2_;
+    metal::float2 float_vec2_ = static_cast<metal::float2>(_e185);
+    output.val_f16_2_ = static_cast<metal::half2>(float_vec2_);
+    metal::half3 _e192 = input_uniform.val_f16_3_;
+    metal::float3 float_vec3_ = static_cast<metal::float3>(_e192);
+    output.val_f16_3_ = static_cast<metal::half3>(float_vec3_);
+    metal::half4 _e199 = input_uniform.val_f16_4_;
+    metal::float4 float_vec4_ = static_cast<metal::float4>(_e199);
+    output.val_f16_4_ = static_cast<metal::half4>(float_vec4_);
+    metal::half2x2 _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208));
+    metal::half2x3 _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215));
+    metal::half2x4 _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222));
+    metal::half3x2 _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229));
+    metal::half3x3 _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236));
+    metal::half3x4 _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243));
+    metal::half4x2 _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250));
+    metal::half4x3 _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257));
+    metal::half4x4 _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264));
+    half _e267 = val;
+    return _e267;
+}
+
+kernel void main_(
+  constant UniformCompatible& input_uniform [[user(fake0)]]
+, device UniformCompatible const& input_storage [[user(fake0)]]
+, device StorageCompatible const& input_arrays [[user(fake0)]]
+, device UniformCompatible& output [[user(fake0)]]
+, device StorageCompatible& output_arrays [[user(fake0)]]
+) {
+    half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays);
+    output.final_value = _e3;
+    return;
+}
--- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
+++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 175
+; Bound: 173
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
@@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64"
-OpEntryPoint GLCompute %105 "test_atomic_compare_exchange_u64"
+OpEntryPoint GLCompute %104 "test_atomic_compare_exchange_u64"
 OpExecutionMode %19 LocalSize 1 1 1
-OpExecutionMode %105 LocalSize 1 1 1
+OpExecutionMode %104 LocalSize 1 1 1
 OpDecorate %5 ArrayStride 8
 OpDecorate %8 ArrayStride 8
 OpMemberDecorate %10 0 Offset 0
@@ -63,12 +63,12 @@ OpMemberDecorate %16 0 Offset 0
 %68 = OpTypeInt 32 1
 %67 = OpConstant  %68  1
 %69 = OpConstant  %3  64
-%106 = OpTypePointer StorageBuffer %8
-%108 = OpConstant  %7  10
-%111 = OpTypePointer Function %7
-%112 = OpConstantNull  %7
-%114 = OpConstantNull  %9
-%138 = OpTypePointer StorageBuffer %7
+%105 = OpTypePointer StorageBuffer %8
+%107 = OpConstant  %7  10
+%110 = OpTypePointer Function %7
+%111 = OpConstantNull  %7
+%113 = OpConstantNull  %9
+%137 = OpTypePointer StorageBuffer %7
 %19 = OpFunction  %2  None %20
 %18 = OpLabel
 %27 = OpVariable  %28  Function %22
@@ -142,16 +142,16 @@ OpBranch %89
 %89 = OpLabel
 %91 = OpLoad  %4  %29
 %92 = OpIAdd  %4  %91 %25
-%94 = OpLoad  %3  %27
-%95 = OpLoad  %4  %29
-%97 = OpAccessChain  %64  %23 %94
-%98 = OpAtomicCompareExchange  %4  %97 %67 %69 %69 %92 %95
-%99 = OpIEqual  %9  %98 %95
-%96 = OpCompositeConstruct  %10  %98 %99
-%100 = OpCompositeExtract  %4  %96 0
-OpStore %29 %100
-%101 = OpCompositeExtract  %9  %96 1
-OpStore %32 %101
+%93 = OpLoad  %3  %27
+%94 = OpLoad  %4  %29
+%96 = OpAccessChain  %64  %23 %93
+%97 = OpAtomicCompareExchange  %4  %96 %67 %69 %69 %92 %94
+%98 = OpIEqual  %9  %97 %94
+%95 = OpCompositeConstruct  %10  %97 %98
+%99 = OpCompositeExtract  %4  %95 0
+OpStore %29 %99
+%100 = OpCompositeExtract  %9  %95 1
+OpStore %32 %100
 OpBranch %90
 %90 = OpLabel
 OpBranch %73
@@ -162,110 +162,110 @@ OpBranch %62
 %62 = OpLabel
 OpBranch %39
 %39 = OpLabel
-%102 = OpLoad  %3  %27
-%103 = OpIAdd  %3  %102 %26
-OpStore %27 %103
+%101 = OpLoad  %3  %27
+%102 = OpIAdd  %3  %101 %26
+OpStore %27 %102
 OpBranch %36
 %37 = OpLabel
 OpReturn
 OpFunctionEnd
-%105 = OpFunction  %2  None %20
-%104 = OpLabel
-%109 = OpVariable  %28  Function %22
-%110 = OpVariable  %111  Function %112
-%113 = OpVariable  %33  Function %114
-%120 = OpVariable  %41  Function %43
-%145 = OpVariable  %41  Function %43
-%107 = OpAccessChain  %106  %15 %22
+%104 = OpFunction  %2  None %20
+%103 = OpLabel
+%108 = OpVariable  %28  Function %22
+%109 = OpVariable  %110  Function %111
+%112 = OpVariable  %33  Function %113
+%119 = OpVariable  %41  Function %43
+%144 = OpVariable  %41  Function %43
+%106 = OpAccessChain  %105  %15 %22
+OpBranch %114
+%114 = OpLabel
 OpBranch %115
 %115 = OpLabel
-OpBranch %116
-%116 = OpLabel
-OpLoopMerge %117 %119 None
-OpBranch %121
-%121 = OpLabel
-%122 = OpLoad  %40  %120
-%123 = OpIEqual  %42  %45 %122
-%124 = OpAll  %9  %123
-OpSelectionMerge %125 None
-OpBranchConditional %124 %117 %125
-%125 = OpLabel
-%126 = OpCompositeExtract  %3  %122 1
-%127 = OpIEqual  %9  %126 %44
-%128 = OpSelect  %3  %127 %26 %22
-%129 = OpCompositeConstruct  %40  %128 %26
-%130 = OpIAdd  %40  %122 %129
-OpStore %120 %130
-OpBranch %118
-%118 = OpLabel
-%131 = OpLoad  %3  %109
-%132 = OpULessThan  %9  %131 %6
-OpSelectionMerge %133 None
-OpBranchConditional %132 %133 %134
-%134 = OpLabel
+OpLoopMerge %116 %118 None
+OpBranch %120
+%120 = OpLabel
+%121 = OpLoad  %40  %119
+%122 = OpIEqual  %42  %45 %121
+%123 = OpAll  %9  %122
+OpSelectionMerge %124 None
+OpBranchConditional %123 %116 %124
+%124 = OpLabel
+%125 = OpCompositeExtract  %3  %121 1
+%126 = OpIEqual  %9  %125 %44
+%127 = OpSelect  %3  %126 %26 %22
+%128 = OpCompositeConstruct  %40  %127 %26
+%129 = OpIAdd  %40  %121 %128
+OpStore %119 %129
 OpBranch %117
+%117 = OpLabel
+%130 = OpLoad  %3  %108
+%131 = OpULessThan  %9  %130 %6
+OpSelectionMerge %132 None
+OpBranchConditional %131 %132 %133
 %133 = OpLabel
-OpBranch %135
-%135 = OpLabel
-%137 = OpLoad  %3  %109
-%139 = OpAccessChain  %138  %107 %137
-%140 = OpAtomicLoad  %7  %139 %67 %69
-OpStore %110 %140
-OpStore %113 %24
-OpBranch %141
-%141 = OpLabel
-OpLoopMerge %142 %144 None
-OpBranch %146
-%146 = OpLabel
-%147 = OpLoad  %40  %145
-%148 = OpIEqual  %42  %45 %147
-%149 = OpAll  %9  %148
-OpSelectionMerge %150 None
-OpBranchConditional %149 %142 %150
-%150 = OpLabel
-%151 = OpCompositeExtract  %3  %147 1
-%152 = OpIEqual  %9  %151 %44
-%153 = OpSelect  %3  %152 %26 %22
-%154 = OpCompositeConstruct  %40  %153 %26
-%155 = OpIAdd  %40  %147 %154
-OpStore %145 %155
-OpBranch %143
-%143 = OpLabel
-%156 = OpLoad  %9  %113
-%157 = OpLogicalNot  %9  %156
-OpSelectionMerge %158 None
-OpBranchConditional %157 %158 %159
-%159 = OpLabel
+OpBranch %116
+%132 = OpLabel
+OpBranch %134
+%134 = OpLabel
+%136 = OpLoad  %3  %108
+%138 = OpAccessChain  %137  %106 %136
+%139 = OpAtomicLoad  %7  %138 %67 %69
+OpStore %109 %139
+OpStore %112 %24
+OpBranch %140
+%140 = OpLabel
+OpLoopMerge %141 %143 None
+OpBranch %145
+%145 = OpLabel
+%146 = OpLoad  %40  %144
+%147 = OpIEqual  %42  %45 %146
+%148 = OpAll  %9  %147
+OpSelectionMerge %149 None
+OpBranchConditional %148 %141 %149
+%149 = OpLabel
+%150 = OpCompositeExtract  %3  %146 1
+%151 = OpIEqual  %9  %150 %44
+%152 = OpSelect  %3  %151 %26 %22
+%153 = OpCompositeConstruct  %40  %152 %26
+%154 = OpIAdd  %40  %146 %153
+OpStore %144 %154
 OpBranch %142
+%142 = OpLabel
+%155 = OpLoad  %9  %112
+%156 = OpLogicalNot  %9  %155
+OpSelectionMerge %157 None
+OpBranchConditional %156 %157 %158
 %158 = OpLabel
+OpBranch %141
+%157 = OpLabel
+OpBranch %159
+%159 = OpLabel
+%161 = OpLoad  %7  %109
+%162 = OpIAdd  %7  %161 %107
+%163 = OpLoad  %3  %108
+%164 = OpLoad  %7  %109
+%166 = OpAccessChain  %137  %106 %163
+%167 = OpAtomicCompareExchange  %7  %166 %67 %69 %69 %162 %164
+%168 = OpIEqual  %9  %167 %164
+%165 = OpCompositeConstruct  %11  %167 %168
+%169 = OpCompositeExtract  %7  %165 0
+OpStore %109 %169
+%170 = OpCompositeExtract  %9  %165 1
+OpStore %112 %170
 OpBranch %160
 %160 = OpLabel
-%162 = OpLoad  %7  %110
-%163 = OpIAdd  %7  %162 %108
-%165 = OpLoad  %3  %109
-%166 = OpLoad  %7  %110
-%168 = OpAccessChain  %138  %107 %165
-%169 = OpAtomicCompareExchange  %7  %168 %67 %69 %69 %163 %166
-%170 = OpIEqual  %9  %169 %166
-%167 = OpCompositeConstruct  %11  %169 %170
-%171 = OpCompositeExtract  %7  %167 0
-OpStore %110 %171
-%172 = OpCompositeExtract  %9  %167 1
-OpStore %113 %172
-OpBranch %161
-%161 = OpLabel
-OpBranch %144
-%144 = OpLabel
-OpBranch %141
-%142 = OpLabel
-OpBranch %136
-%136 = OpLabel
-OpBranch %119
-%119 = OpLabel
-%173 = OpLoad  %3  %109
-%174 = OpIAdd  %3  %173 %26
-OpStore %109 %174
-OpBranch %116
-%117 = OpLabel
+OpBranch %143
+%143 = OpLabel
+OpBranch %140
+%141 = OpLabel
+OpBranch %135
+%135 = OpLabel
+OpBranch %118
+%118 = OpLabel
+%171 = OpLoad  %3  %108
+%172 = OpIAdd  %3  %171 %26
+OpStore %108 %172
+OpBranch %115
+%116 = OpLabel
 OpReturn
 OpFunctionEnd
--- a/naga/tests/out/spv/extra.spvasm
+++ b/naga/tests/out/spv/extra.spvasm
@@ -1,76 +1,74 @@
 ; SPIR-V
 ; Version: 1.2
 ; Generator: rspirv
-; Bound: 48
+; Bound: 47
 OpCapability Shader
-OpCapability Float64
 OpCapability Geometry
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %23 "main" %15 %18 %21
-OpExecutionMode %23 OriginUpperLeft
+OpEntryPoint Fragment %22 "main" %14 %17 %20
+OpExecutionMode %22 OriginUpperLeft
 OpMemberDecorate %6 0 Offset 0
-OpMemberDecorate %6 1 Offset 16
-OpMemberDecorate %9 0 Offset 0
-OpMemberDecorate %9 1 Offset 16
-OpDecorate %11 Block
-OpMemberDecorate %11 0 Offset 0
-OpDecorate %15 Location 0
-OpDecorate %18 BuiltIn PrimitiveId
-OpDecorate %18 Flat
-OpDecorate %21 Location 0
+OpMemberDecorate %6 1 Offset 8
+OpMemberDecorate %8 0 Offset 0
+OpMemberDecorate %8 1 Offset 16
+OpDecorate %10 Block
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %14 Location 0
+OpDecorate %17 BuiltIn PrimitiveId
+OpDecorate %17 Flat
+OpDecorate %20 Location 0
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
-%5 = OpTypeFloat 64
+%5 = OpTypeFloat 32
 %4 = OpTypeVector %5 2
 %6 = OpTypeStruct %3 %4
-%8 = OpTypeFloat 32
-%7 = OpTypeVector %8 4
-%9 = OpTypeStruct %7 %3
-%11 = OpTypeStruct %6
-%12 = OpTypePointer PushConstant %11
-%10 = OpVariable  %12  PushConstant
-%16 = OpTypePointer Input %7
-%15 = OpVariable  %16  Input
-%19 = OpTypePointer Input %3
-%18 = OpVariable  %19  Input
-%22 = OpTypePointer Output %7
-%21 = OpVariable  %22  Output
-%24 = OpTypeFunction %2
-%25 = OpTypePointer PushConstant %6
-%26 = OpConstant  %3  0
-%28 = OpConstant  %8  1.0
-%29 = OpTypeVector %8 3
-%30 = OpConstantComposite  %29  %28 %28 %28
-%33 = OpTypePointer PushConstant %3
-%36 = OpTypeBool
-%23 = OpFunction  %2  None %24
-%13 = OpLabel
-%17 = OpLoad  %7  %15
-%20 = OpLoad  %3  %18
-%14 = OpCompositeConstruct  %9  %17 %20
-%27 = OpAccessChain  %25  %10 %26
-OpBranch %31
-%31 = OpLabel
-%32 = OpCompositeExtract  %3  %14 1
-%34 = OpAccessChain  %33  %27 %26
-%35 = OpLoad  %3  %34
-%37 = OpIEqual  %36  %32 %35
-OpSelectionMerge %38 None
-OpBranchConditional %37 %39 %40
-%39 = OpLabel
-%41 = OpCompositeExtract  %7  %14 0
-OpStore %21 %41
-OpReturn
-%40 = OpLabel
-%42 = OpCompositeExtract  %7  %14 0
-%43 = OpVectorShuffle  %29  %42 %42 0 1 2
-%44 = OpFSub  %29  %30 %43
-%45 = OpCompositeExtract  %7  %14 0
-%46 = OpCompositeExtract  %8  %45 3
-%47 = OpCompositeConstruct  %7  %44 %46
-OpStore %21 %47
-OpReturn
+%7 = OpTypeVector %5 4
+%8 = OpTypeStruct %7 %3
+%10 = OpTypeStruct %6
+%11 = OpTypePointer PushConstant %10
+%9 = OpVariable  %11  PushConstant
+%15 = OpTypePointer Input %7
+%14 = OpVariable  %15  Input
+%18 = OpTypePointer Input %3
+%17 = OpVariable  %18  Input
+%21 = OpTypePointer Output %7
+%20 = OpVariable  %21  Output
+%23 = OpTypeFunction %2
+%24 = OpTypePointer PushConstant %6
+%25 = OpConstant  %3  0
+%27 = OpConstant  %5  1.0
+%28 = OpTypeVector %5 3
+%29 = OpConstantComposite  %28  %27 %27 %27
+%32 = OpTypePointer PushConstant %3
+%35 = OpTypeBool
+%22 = OpFunction  %2  None %23
+%12 = OpLabel
+%16 = OpLoad  %7  %14
+%19 = OpLoad  %3  %17
+%13 = OpCompositeConstruct  %8  %16 %19
+%26 = OpAccessChain  %24  %9 %25
+OpBranch %30
+%30 = OpLabel
+%31 = OpCompositeExtract  %3  %13 1
+%33 = OpAccessChain  %32  %26 %25
+%34 = OpLoad  %3  %33
+%36 = OpIEqual  %35  %31 %34
+OpSelectionMerge %37 None
+OpBranchConditional %36 %38 %39
 %38 = OpLabel
+%40 = OpCompositeExtract  %7  %13 0
+OpStore %20 %40
+OpReturn
+%39 = OpLabel
+%41 = OpCompositeExtract  %7  %13 0
+%42 = OpVectorShuffle  %28  %41 %41 0 1 2
+%43 = OpFSub  %28  %29 %42
+%44 = OpCompositeExtract  %7  %13 0
+%45 = OpCompositeExtract  %5  %44 3
+%46 = OpCompositeConstruct  %7  %43 %45
+OpStore %20 %46
+OpReturn
+%37 = OpLabel
 OpReturn
 OpFunctionEnd
--- a/naga/tests/out/spv/f16.spvasm
+++ b/naga/tests/out/spv/f16.spvasm
@@ -0,0 +1,633 @@
+; SPIR-V
+; Version: 1.0
+; Generator: rspirv
+; Bound: 511
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability StorageInputOutput16
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %499 "main"
+OpExecutionMode %499 LocalSize 1 1 1
+OpMemberDecorate %19 0 Offset 0
+OpMemberDecorate %19 1 Offset 4
+OpMemberDecorate %19 2 Offset 8
+OpMemberDecorate %19 3 Offset 12
+OpMemberDecorate %19 4 Offset 16
+OpMemberDecorate %19 5 Offset 24
+OpMemberDecorate %19 6 Offset 32
+OpMemberDecorate %19 7 Offset 40
+OpMemberDecorate %19 8 Offset 44
+OpMemberDecorate %19 8 ColMajor
+OpMemberDecorate %19 8 MatrixStride 4
+OpMemberDecorate %19 9 Offset 56
+OpMemberDecorate %19 9 ColMajor
+OpMemberDecorate %19 9 MatrixStride 8
+OpMemberDecorate %19 10 Offset 72
+OpMemberDecorate %19 10 ColMajor
+OpMemberDecorate %19 10 MatrixStride 8
+OpMemberDecorate %19 11 Offset 88
+OpMemberDecorate %19 11 ColMajor
+OpMemberDecorate %19 11 MatrixStride 4
+OpMemberDecorate %19 12 Offset 104
+OpMemberDecorate %19 12 ColMajor
+OpMemberDecorate %19 12 MatrixStride 8
+OpMemberDecorate %19 13 Offset 128
+OpMemberDecorate %19 13 ColMajor
+OpMemberDecorate %19 13 MatrixStride 8
+OpMemberDecorate %19 14 Offset 152
+OpMemberDecorate %19 14 ColMajor
+OpMemberDecorate %19 14 MatrixStride 4
+OpMemberDecorate %19 15 Offset 168
+OpMemberDecorate %19 15 ColMajor
+OpMemberDecorate %19 15 MatrixStride 8
+OpMemberDecorate %19 16 Offset 200
+OpMemberDecorate %19 16 ColMajor
+OpMemberDecorate %19 16 MatrixStride 8
+OpDecorate %20 ArrayStride 2
+OpMemberDecorate %22 0 Offset 0
+OpMemberDecorate %23 0 Offset 0
+OpMemberDecorate %23 1 Offset 2
+OpMemberDecorate %23 2 Offset 8
+OpMemberDecorate %23 3 Offset 14
+OpMemberDecorate %23 4 Offset 16
+OpMemberDecorate %23 5 Offset 20
+OpDecorate %28 DescriptorSet 0
+OpDecorate %28 Binding 0
+OpDecorate %29 Block
+OpMemberDecorate %29 0 Offset 0
+OpDecorate %31 NonWritable
+OpDecorate %31 DescriptorSet 0
+OpDecorate %31 Binding 1
+OpDecorate %32 Block
+OpMemberDecorate %32 0 Offset 0
+OpDecorate %34 NonWritable
+OpDecorate %34 DescriptorSet 0
+OpDecorate %34 Binding 2
+OpDecorate %35 Block
+OpMemberDecorate %35 0 Offset 0
+OpDecorate %37 DescriptorSet 0
+OpDecorate %37 Binding 3
+OpDecorate %38 Block
+OpMemberDecorate %38 0 Offset 0
+OpDecorate %40 DescriptorSet 0
+OpDecorate %40 Binding 4
+OpDecorate %41 Block
+OpMemberDecorate %41 0 Offset 0
+%2 = OpTypeVoid
+%3 = OpTypeFloat 16
+%4 = OpTypeInt 32 0
+%5 = OpTypeInt 32 1
+%6 = OpTypeFloat 32
+%7 = OpTypeVector %3 2
+%8 = OpTypeVector %3 3
+%9 = OpTypeVector %3 4
+%10 = OpTypeMatrix %7 2
+%11 = OpTypeMatrix %8 2
+%12 = OpTypeMatrix %9 2
+%13 = OpTypeMatrix %7 3
+%14 = OpTypeMatrix %8 3
+%15 = OpTypeMatrix %9 3
+%16 = OpTypeMatrix %7 4
+%17 = OpTypeMatrix %8 4
+%18 = OpTypeMatrix %9 4
+%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18
+%21 = OpConstant  %4  2
+%20 = OpTypeArray %3 %21
+%22 = OpTypeStruct %20
+%23 = OpTypeStruct %3 %3 %8 %3 %3 %4
+%24 = OpConstant  %3  2.1524e-41
+%25 = OpConstant  %3  2.7121e-41
+%27 = OpTypePointer Private %3
+%26 = OpVariable  %27  Private %24
+%29 = OpTypeStruct %19
+%30 = OpTypePointer Uniform %29
+%28 = OpVariable  %30  Uniform
+%32 = OpTypeStruct %19
+%33 = OpTypePointer StorageBuffer %32
+%31 = OpVariable  %33  StorageBuffer
+%35 = OpTypeStruct %22
+%36 = OpTypePointer StorageBuffer %35
+%34 = OpVariable  %36  StorageBuffer
+%38 = OpTypeStruct %19
+%39 = OpTypePointer StorageBuffer %38
+%37 = OpVariable  %39  StorageBuffer
+%41 = OpTypeStruct %22
+%42 = OpTypePointer StorageBuffer %41
+%40 = OpVariable  %42  StorageBuffer
+%46 = OpTypeFunction %3 %3
+%47 = OpTypePointer Uniform %19
+%48 = OpConstant  %4  0
+%50 = OpTypePointer StorageBuffer %19
+%52 = OpTypePointer StorageBuffer %22
+%56 = OpConstant  %3  8.8991e-41
+%57 = OpConstant  %3  2.4753e-41
+%59 = OpTypePointer Function %3
+%67 = OpTypePointer Uniform %6
+%76 = OpTypePointer Uniform %3
+%77 = OpConstant  %4  3
+%84 = OpTypePointer StorageBuffer %3
+%91 = OpTypePointer StorageBuffer %7
+%92 = OpTypePointer Uniform %7
+%93 = OpConstant  %4  4
+%100 = OpTypePointer StorageBuffer %8
+%101 = OpTypePointer Uniform %8
+%102 = OpConstant  %4  5
+%109 = OpTypePointer StorageBuffer %9
+%110 = OpTypePointer Uniform %9
+%111 = OpConstant  %4  6
+%118 = OpTypePointer StorageBuffer %10
+%119 = OpTypePointer Uniform %10
+%120 = OpConstant  %4  8
+%133 = OpTypePointer StorageBuffer %11
+%134 = OpTypePointer Uniform %11
+%135 = OpConstant  %4  9
+%148 = OpTypePointer StorageBuffer %12
+%149 = OpTypePointer Uniform %12
+%150 = OpConstant  %4  10
+%163 = OpTypePointer StorageBuffer %13
+%164 = OpTypePointer Uniform %13
+%165 = OpConstant  %4  11
+%181 = OpTypePointer StorageBuffer %14
+%182 = OpTypePointer Uniform %14
+%183 = OpConstant  %4  12
+%199 = OpTypePointer StorageBuffer %15
+%200 = OpTypePointer Uniform %15
+%201 = OpConstant  %4  13
+%217 = OpTypePointer StorageBuffer %16
+%218 = OpTypePointer Uniform %16
+%219 = OpConstant  %4  14
+%238 = OpTypePointer StorageBuffer %17
+%239 = OpTypePointer Uniform %17
+%240 = OpConstant  %4  15
+%259 = OpTypePointer StorageBuffer %18
+%260 = OpTypePointer Uniform %18
+%261 = OpConstant  %4  16
+%280 = OpTypePointer StorageBuffer %20
+%319 = OpTypeVector %6 2
+%325 = OpTypeVector %6 3
+%331 = OpTypeVector %6 4
+%337 = OpTypeMatrix %319 2
+%351 = OpTypeMatrix %325 2
+%365 = OpTypeMatrix %331 2
+%379 = OpTypeMatrix %319 3
+%397 = OpTypeMatrix %325 3
+%415 = OpTypeMatrix %331 3
+%433 = OpTypeMatrix %319 4
+%455 = OpTypeMatrix %325 4
+%477 = OpTypeMatrix %331 4
+%500 = OpTypeFunction %2
+%506 = OpConstant  %3  2.2959e-41
+%509 = OpConstant  %4  7
+%45 = OpFunction  %3  None %46
+%44 = OpFunctionParameter  %3
+%43 = OpLabel
+%58 = OpVariable  %59  Function %25
+%49 = OpAccessChain  %47  %28 %48
+%51 = OpAccessChain  %50  %31 %48
+%53 = OpAccessChain  %52  %34 %48
+%54 = OpAccessChain  %50  %37 %48
+%55 = OpAccessChain  %52  %40 %48
+OpBranch %60
+%60 = OpLabel
+%61 = OpLoad  %3  %58
+%62 = OpFAdd  %3  %61 %56
+OpStore %58 %62
+%63 = OpLoad  %3  %58
+%64 = OpFAdd  %3  %63 %57
+%65 = OpLoad  %3  %58
+%66 = OpFAdd  %3  %65 %64
+OpStore %58 %66
+%68 = OpAccessChain  %67  %49 %21
+%69 = OpLoad  %6  %68
+%70 = OpLoad  %3  %58
+%71 = OpFConvert  %6  %70
+%72 = OpFAdd  %6  %69 %71
+%73 = OpFConvert  %3  %72
+%74 = OpLoad  %3  %58
+%75 = OpFAdd  %3  %74 %73
+OpStore %58 %75
+%78 = OpAccessChain  %76  %49 %77
+%79 = OpLoad  %3  %78
+%80 = OpCompositeConstruct  %8  %79 %79 %79
+%81 = OpCompositeExtract  %3  %80 2
+%82 = OpLoad  %3  %58
+%83 = OpFAdd  %3  %82 %81
+OpStore %58 %83
+%85 = OpAccessChain  %76  %49 %77
+%86 = OpLoad  %3  %85
+%87 = OpAccessChain  %84  %51 %77
+%88 = OpLoad  %3  %87
+%89 = OpFAdd  %3  %86 %88
+%90 = OpAccessChain  %84  %54 %77
+OpStore %90 %89
+%94 = OpAccessChain  %92  %49 %93
+%95 = OpLoad  %7  %94
+%96 = OpAccessChain  %91  %51 %93
+%97 = OpLoad  %7  %96
+%98 = OpFAdd  %7  %95 %97
+%99 = OpAccessChain  %91  %54 %93
+OpStore %99 %98
+%103 = OpAccessChain  %101  %49 %102
+%104 = OpLoad  %8  %103
+%105 = OpAccessChain  %100  %51 %102
+%106 = OpLoad  %8  %105
+%107 = OpFAdd  %8  %104 %106
+%108 = OpAccessChain  %100  %54 %102
+OpStore %108 %107
+%112 = OpAccessChain  %110  %49 %111
+%113 = OpLoad  %9  %112
+%114 = OpAccessChain  %109  %51 %111
+%115 = OpLoad  %9  %114
+%116 = OpFAdd  %9  %113 %115
+%117 = OpAccessChain  %109  %54 %111
+OpStore %117 %116
+%121 = OpAccessChain  %119  %49 %120
+%122 = OpLoad  %10  %121
+%123 = OpAccessChain  %118  %51 %120
+%124 = OpLoad  %10  %123
+%126 = OpCompositeExtract  %7  %122 0
+%127 = OpCompositeExtract  %7  %124 0
+%128 = OpFAdd  %7  %126 %127
+%129 = OpCompositeExtract  %7  %122 1
+%130 = OpCompositeExtract  %7  %124 1
+%131 = OpFAdd  %7  %129 %130
+%125 = OpCompositeConstruct  %10  %128 %131
+%132 = OpAccessChain  %118  %54 %120
+OpStore %132 %125
+%136 = OpAccessChain  %134  %49 %135
+%137 = OpLoad  %11  %136
+%138 = OpAccessChain  %133  %51 %135
+%139 = OpLoad  %11  %138
+%141 = OpCompositeExtract  %8  %137 0
+%142 = OpCompositeExtract  %8  %139 0
+%143 = OpFAdd  %8  %141 %142
+%144 = OpCompositeExtract  %8  %137 1
+%145 = OpCompositeExtract  %8  %139 1
+%146 = OpFAdd  %8  %144 %145
+%140 = OpCompositeConstruct  %11  %143 %146
+%147 = OpAccessChain  %133  %54 %135
+OpStore %147 %140
+%151 = OpAccessChain  %149  %49 %150
+%152 = OpLoad  %12  %151
+%153 = OpAccessChain  %148  %51 %150
+%154 = OpLoad  %12  %153
+%156 = OpCompositeExtract  %9  %152 0
+%157 = OpCompositeExtract  %9  %154 0
+%158 = OpFAdd  %9  %156 %157
+%159 = OpCompositeExtract  %9  %152 1
+%160 = OpCompositeExtract  %9  %154 1
+%161 = OpFAdd  %9  %159 %160
+%155 = OpCompositeConstruct  %12  %158 %161
+%162 = OpAccessChain  %148  %54 %150
+OpStore %162 %155
+%166 = OpAccessChain  %164  %49 %165
+%167 = OpLoad  %13  %166
+%168 = OpAccessChain  %163  %51 %165
+%169 = OpLoad  %13  %168
+%171 = OpCompositeExtract  %7  %167 0
+%172 = OpCompositeExtract  %7  %169 0
+%173 = OpFAdd  %7  %171 %172
+%174 = OpCompositeExtract  %7  %167 1
+%175 = OpCompositeExtract  %7  %169 1
+%176 = OpFAdd  %7  %174 %175
+%177 = OpCompositeExtract  %7  %167 2
+%178 = OpCompositeExtract  %7  %169 2
+%179 = OpFAdd  %7  %177 %178
+%170 = OpCompositeConstruct  %13  %173 %176 %179
+%180 = OpAccessChain  %163  %54 %165
+OpStore %180 %170
+%184 = OpAccessChain  %182  %49 %183
+%185 = OpLoad  %14  %184
+%186 = OpAccessChain  %181  %51 %183
+%187 = OpLoad  %14  %186
+%189 = OpCompositeExtract  %8  %185 0
+%190 = OpCompositeExtract  %8  %187 0
+%191 = OpFAdd  %8  %189 %190
+%192 = OpCompositeExtract  %8  %185 1
+%193 = OpCompositeExtract  %8  %187 1
+%194 = OpFAdd  %8  %192 %193
+%195 = OpCompositeExtract  %8  %185 2
+%196 = OpCompositeExtract  %8  %187 2
+%197 = OpFAdd  %8  %195 %196
+%188 = OpCompositeConstruct  %14  %191 %194 %197
+%198 = OpAccessChain  %181  %54 %183
+OpStore %198 %188
+%202 = OpAccessChain  %200  %49 %201
+%203 = OpLoad  %15  %202
+%204 = OpAccessChain  %199  %51 %201
+%205 = OpLoad  %15  %204
+%207 = OpCompositeExtract  %9  %203 0
+%208 = OpCompositeExtract  %9  %205 0
+%209 = OpFAdd  %9  %207 %208
+%210 = OpCompositeExtract  %9  %203 1
+%211 = OpCompositeExtract  %9  %205 1
+%212 = OpFAdd  %9  %210 %211
+%213 = OpCompositeExtract  %9  %203 2
+%214 = OpCompositeExtract  %9  %205 2
+%215 = OpFAdd  %9  %213 %214
+%206 = OpCompositeConstruct  %15  %209 %212 %215
+%216 = OpAccessChain  %199  %54 %201
+OpStore %216 %206
+%220 = OpAccessChain  %218  %49 %219
+%221 = OpLoad  %16  %220
+%222 = OpAccessChain  %217  %51 %219
+%223 = OpLoad  %16  %222
+%225 = OpCompositeExtract  %7  %221 0
+%226 = OpCompositeExtract  %7  %223 0
+%227 = OpFAdd  %7  %225 %226
+%228 = OpCompositeExtract  %7  %221 1
+%229 = OpCompositeExtract  %7  %223 1
+%230 = OpFAdd  %7  %228 %229
+%231 = OpCompositeExtract  %7  %221 2
+%232 = OpCompositeExtract  %7  %223 2
+%233 = OpFAdd  %7  %231 %232
+%234 = OpCompositeExtract  %7  %221 3
+%235 = OpCompositeExtract  %7  %223 3
+%236 = OpFAdd  %7  %234 %235
+%224 = OpCompositeConstruct  %16  %227 %230 %233 %236
+%237 = OpAccessChain  %217  %54 %219
+OpStore %237 %224
+%241 = OpAccessChain  %239  %49 %240
+%242 = OpLoad  %17  %241
+%243 = OpAccessChain  %238  %51 %240
+%244 = OpLoad  %17  %243
+%246 = OpCompositeExtract  %8  %242 0
+%247 = OpCompositeExtract  %8  %244 0
+%248 = OpFAdd  %8  %246 %247
+%249 = OpCompositeExtract  %8  %242 1
+%250 = OpCompositeExtract  %8  %244 1
+%251 = OpFAdd  %8  %249 %250
+%252 = OpCompositeExtract  %8  %242 2
+%253 = OpCompositeExtract  %8  %244 2
+%254 = OpFAdd  %8  %252 %253
+%255 = OpCompositeExtract  %8  %242 3
+%256 = OpCompositeExtract  %8  %244 3
+%257 = OpFAdd  %8  %255 %256
+%245 = OpCompositeConstruct  %17  %248 %251 %254 %257
+%258 = OpAccessChain  %238  %54 %240
+OpStore %258 %245
+%262 = OpAccessChain  %260  %49 %261
+%263 = OpLoad  %18  %262
+%264 = OpAccessChain  %259  %51 %261
+%265 = OpLoad  %18  %264
+%267 = OpCompositeExtract  %9  %263 0
+%268 = OpCompositeExtract  %9  %265 0
+%269 = OpFAdd  %9  %267 %268
+%270 = OpCompositeExtract  %9  %263 1
+%271 = OpCompositeExtract  %9  %265 1
+%272 = OpFAdd  %9  %270 %271
+%273 = OpCompositeExtract  %9  %263 2
+%274 = OpCompositeExtract  %9  %265 2
+%275 = OpFAdd  %9  %273 %274
+%276 = OpCompositeExtract  %9  %263 3
+%277 = OpCompositeExtract  %9  %265 3
+%278 = OpFAdd  %9  %276 %277
+%266 = OpCompositeConstruct  %18  %269 %272 %275 %278
+%279 = OpAccessChain  %259  %54 %261
+OpStore %279 %266
+%281 = OpAccessChain  %280  %53 %48
+%282 = OpLoad  %20  %281
+%283 = OpAccessChain  %280  %55 %48
+OpStore %283 %282
+%284 = OpLoad  %3  %58
+%285 = OpExtInst  %3  %1 FAbs %284
+%286 = OpLoad  %3  %58
+%287 = OpFAdd  %3  %286 %285
+OpStore %58 %287
+%288 = OpLoad  %3  %58
+%289 = OpLoad  %3  %58
+%290 = OpLoad  %3  %58
+%291 = OpExtInst  %3  %1 FClamp %288 %289 %290
+%292 = OpLoad  %3  %58
+%293 = OpFAdd  %3  %292 %291
+OpStore %58 %293
+%294 = OpLoad  %3  %58
+%295 = OpCompositeConstruct  %7  %294 %294
+%296 = OpLoad  %3  %58
+%297 = OpCompositeConstruct  %7  %296 %296
+%298 = OpDot  %3  %295 %297
+%299 = OpLoad  %3  %58
+%300 = OpFAdd  %3  %299 %298
+OpStore %58 %300
+%301 = OpLoad  %3  %58
+%302 = OpLoad  %3  %58
+%303 = OpExtInst  %3  %1 FMax %301 %302
+%304 = OpLoad  %3  %58
+%305 = OpFAdd  %3  %304 %303
+OpStore %58 %305
+%306 = OpLoad  %3  %58
+%307 = OpLoad  %3  %58
+%308 = OpExtInst  %3  %1 FMin %306 %307
+%309 = OpLoad  %3  %58
+%310 = OpFAdd  %3  %309 %308
+OpStore %58 %310
+%311 = OpLoad  %3  %58
+%312 = OpExtInst  %3  %1 FSign %311
+%313 = OpLoad  %3  %58
+%314 = OpFAdd  %3  %313 %312
+OpStore %58 %314
+%315 = OpLoad  %3  %58
+%316 = OpFAdd  %3  %315 %24
+OpStore %58 %316
+%317 = OpAccessChain  %92  %49 %93
+%318 = OpLoad  %7  %317
+%320 = OpFConvert  %319  %318
+%321 = OpFConvert  %7  %320
+%322 = OpAccessChain  %91  %54 %93
+OpStore %322 %321
+%323 = OpAccessChain  %101  %49 %102
+%324 = OpLoad  %8  %323
+%326 = OpFConvert  %325  %324
+%327 = OpFConvert  %8  %326
+%328 = OpAccessChain  %100  %54 %102
+OpStore %328 %327
+%329 = OpAccessChain  %110  %49 %111
+%330 = OpLoad  %9  %329
+%332 = OpFConvert  %331  %330
+%333 = OpFConvert  %9  %332
+%334 = OpAccessChain  %109  %54 %111
+OpStore %334 %333
+%335 = OpAccessChain  %119  %49 %120
+%336 = OpLoad  %10  %335
+%338 = OpCompositeExtract  %7  %336 0
+%339 = OpFConvert  %319  %338
+%340 = OpCompositeExtract  %7  %336 1
+%341 = OpFConvert  %319  %340
+%342 = OpCompositeConstruct  %337  %339 %341
+%343 = OpCompositeExtract  %319  %342 0
+%344 = OpFConvert  %7  %343
+%345 = OpCompositeExtract  %319  %342 1
+%346 = OpFConvert  %7  %345
+%347 = OpCompositeConstruct  %10  %344 %346
+%348 = OpAccessChain  %118  %54 %120
+OpStore %348 %347
+%349 = OpAccessChain  %134  %49 %135
+%350 = OpLoad  %11  %349
+%352 = OpCompositeExtract  %8  %350 0
+%353 = OpFConvert  %325  %352
+%354 = OpCompositeExtract  %8  %350 1
+%355 = OpFConvert  %325  %354
+%356 = OpCompositeConstruct  %351  %353 %355
+%357 = OpCompositeExtract  %325  %356 0
+%358 = OpFConvert  %8  %357
+%359 = OpCompositeExtract  %325  %356 1
+%360 = OpFConvert  %8  %359
+%361 = OpCompositeConstruct  %11  %358 %360
+%362 = OpAccessChain  %133  %54 %135
+OpStore %362 %361
+%363 = OpAccessChain  %149  %49 %150
+%364 = OpLoad  %12  %363
+%366 = OpCompositeExtract  %9  %364 0
+%367 = OpFConvert  %331  %366
+%368 = OpCompositeExtract  %9  %364 1
+%369 = OpFConvert  %331  %368
+%370 = OpCompositeConstruct  %365  %367 %369
+%371 = OpCompositeExtract  %331  %370 0
+%372 = OpFConvert  %9  %371
+%373 = OpCompositeExtract  %331  %370 1
+%374 = OpFConvert  %9  %373
+%375 = OpCompositeConstruct  %12  %372 %374
+%376 = OpAccessChain  %148  %54 %150
+OpStore %376 %375
+%377 = OpAccessChain  %164  %49 %165
+%378 = OpLoad  %13  %377
+%380 = OpCompositeExtract  %7  %378 0
+%381 = OpFConvert  %319  %380
+%382 = OpCompositeExtract  %7  %378 1
+%383 = OpFConvert  %319  %382
+%384 = OpCompositeExtract  %7  %378 2
+%385 = OpFConvert  %319  %384
+%386 = OpCompositeConstruct  %379  %381 %383 %385
+%387 = OpCompositeExtract  %319  %386 0
+%388 = OpFConvert  %7  %387
+%389 = OpCompositeExtract  %319  %386 1
+%390 = OpFConvert  %7  %389
+%391 = OpCompositeExtract  %319  %386 2
+%392 = OpFConvert  %7  %391
+%393 = OpCompositeConstruct  %13  %388 %390 %392
+%394 = OpAccessChain  %163  %54 %165
+OpStore %394 %393
+%395 = OpAccessChain  %182  %49 %183
+%396 = OpLoad  %14  %395
+%398 = OpCompositeExtract  %8  %396 0
+%399 = OpFConvert  %325  %398
+%400 = OpCompositeExtract  %8  %396 1
+%401 = OpFConvert  %325  %400
+%402 = OpCompositeExtract  %8  %396 2
+%403 = OpFConvert  %325  %402
+%404 = OpCompositeConstruct  %397  %399 %401 %403
+%405 = OpCompositeExtract  %325  %404 0
+%406 = OpFConvert  %8  %405
+%407 = OpCompositeExtract  %325  %404 1
+%408 = OpFConvert  %8  %407
+%409 = OpCompositeExtract  %325  %404 2
+%410 = OpFConvert  %8  %409
+%411 = OpCompositeConstruct  %14  %406 %408 %410
+%412 = OpAccessChain  %181  %54 %183
+OpStore %412 %411
+%413 = OpAccessChain  %200  %49 %201
+%414 = OpLoad  %15  %413
+%416 = OpCompositeExtract  %9  %414 0
+%417 = OpFConvert  %331  %416
+%418 = OpCompositeExtract  %9  %414 1
+%419 = OpFConvert  %331  %418
+%420 = OpCompositeExtract  %9  %414 2
+%421 = OpFConvert  %331  %420
+%422 = OpCompositeConstruct  %415  %417 %419 %421
+%423 = OpCompositeExtract  %331  %422 0
+%424 = OpFConvert  %9  %423
+%425 = OpCompositeExtract  %331  %422 1
+%426 = OpFConvert  %9  %425
+%427 = OpCompositeExtract  %331  %422 2
+%428 = OpFConvert  %9  %427
+%429 = OpCompositeConstruct  %15  %424 %426 %428
+%430 = OpAccessChain  %199  %54 %201
+OpStore %430 %429
+%431 = OpAccessChain  %218  %49 %219
+%432 = OpLoad  %16  %431
+%434 = OpCompositeExtract  %7  %432 0
+%435 = OpFConvert  %319  %434
+%436 = OpCompositeExtract  %7  %432 1
+%437 = OpFConvert  %319  %436
+%438 = OpCompositeExtract  %7  %432 2
+%439 = OpFConvert  %319  %438
+%440 = OpCompositeExtract  %7  %432 3
+%441 = OpFConvert  %319  %440
+%442 = OpCompositeConstruct  %433  %435 %437 %439 %441
+%443 = OpCompositeExtract  %319  %442 0
+%444 = OpFConvert  %7  %443
+%445 = OpCompositeExtract  %319  %442 1
+%446 = OpFConvert  %7  %445
+%447 = OpCompositeExtract  %319  %442 2
+%448 = OpFConvert  %7  %447
+%449 = OpCompositeExtract  %319  %442 3
+%450 = OpFConvert  %7  %449
+%451 = OpCompositeConstruct  %16  %444 %446 %448 %450
+%452 = OpAccessChain  %217  %54 %219
+OpStore %452 %451
+%453 = OpAccessChain  %239  %49 %240
+%454 = OpLoad  %17  %453
+%456 = OpCompositeExtract  %8  %454 0
+%457 = OpFConvert  %325  %456
+%458 = OpCompositeExtract  %8  %454 1
+%459 = OpFConvert  %325  %458
+%460 = OpCompositeExtract  %8  %454 2
+%461 = OpFConvert  %325  %460
+%462 = OpCompositeExtract  %8  %454 3
+%463 = OpFConvert  %325  %462
+%464 = OpCompositeConstruct  %455  %457 %459 %461 %463
+%465 = OpCompositeExtract  %325  %464 0
+%466 = OpFConvert  %8  %465
+%467 = OpCompositeExtract  %325  %464 1
+%468 = OpFConvert  %8  %467
+%469 = OpCompositeExtract  %325  %464 2
+%470 = OpFConvert  %8  %469
+%471 = OpCompositeExtract  %325  %464 3
+%472 = OpFConvert  %8  %471
+%473 = OpCompositeConstruct  %17  %466 %468 %470 %472
+%474 = OpAccessChain  %238  %54 %240
+OpStore %474 %473
+%475 = OpAccessChain  %260  %49 %261
+%476 = OpLoad  %18  %475
+%478 = OpCompositeExtract  %9  %476 0
+%479 = OpFConvert  %331  %478
+%480 = OpCompositeExtract  %9  %476 1
+%481 = OpFConvert  %331  %480
+%482 = OpCompositeExtract  %9  %476 2
+%483 = OpFConvert  %331  %482
+%484 = OpCompositeExtract  %9  %476 3
+%485 = OpFConvert  %331  %484
+%486 = OpCompositeConstruct  %477  %479 %481 %483 %485
+%487 = OpCompositeExtract  %331  %486 0
+%488 = OpFConvert  %9  %487
+%489 = OpCompositeExtract  %331  %486 1
+%490 = OpFConvert  %9  %489
+%491 = OpCompositeExtract  %331  %486 2
+%492 = OpFConvert  %9  %491
+%493 = OpCompositeExtract  %331  %486 3
+%494 = OpFConvert  %9  %493
+%495 = OpCompositeConstruct  %18  %488 %490 %492 %494
+%496 = OpAccessChain  %259  %54 %261
+OpStore %496 %495
+%497 = OpLoad  %3  %58
+OpReturnValue %497
+OpFunctionEnd
+%499 = OpFunction  %2  None %500
+%498 = OpLabel
+%501 = OpAccessChain  %47  %28 %48
+%502 = OpAccessChain  %50  %31 %48
+%503 = OpAccessChain  %52  %34 %48
+%504 = OpAccessChain  %50  %37 %48
+%505 = OpAccessChain  %52  %40 %48
+OpBranch %507
+%507 = OpLabel
+%508 = OpFunctionCall  %3  %45 %506
+%510 = OpAccessChain  %84  %504 %509
+OpStore %510 %508
+OpReturn
+OpFunctionEnd
--- a/naga/tests/out/spv/image.spvasm
+++ b/naga/tests/out/spv/image.spvasm
--- a/naga/tests/out/wgsl/extra.wgsl
+++ b/naga/tests/out/wgsl/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
    index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }

 struct FragmentIn {
--- a/naga/tests/out/wgsl/f16-glsl.comp.wgsl
+++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
@@ -0,0 +1,47 @@
+enable f16;
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+@group(0) @binding(0) 
+var<uniform> global: A;
+@group(0) @binding(1) 
+var<storage, read_write> global_1: B;
+
+fn main_1() {
+    let _e16 = global.a_1_;
+    global_1.b_1_ = _e16;
+    let _e17 = global.a_vec2_;
+    global_1.b_vec2_ = _e17;
+    let _e18 = global.a_vec3_;
+    global_1.b_vec3_ = _e18;
+    let _e19 = global.a_vec4_;
+    global_1.b_vec4_ = _e19;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+    return;
+}
--- a/naga/tests/out/wgsl/f16-spv.wgsl
+++ b/naga/tests/out/wgsl/f16-spv.wgsl
@@ -0,0 +1,46 @@
+enable f16;
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+@group(0) @binding(1) 
+var<storage, read_write> unnamed: B;
+@group(0) @binding(0) 
+var<uniform> unnamed_1: A;
+
+fn main_1() {
+    let _e3 = unnamed_1.a_1_;
+    unnamed.b_1_ = _e3;
+    let _e6 = unnamed_1.a_vec2_;
+    unnamed.b_vec2_ = _e6;
+    let _e9 = unnamed_1.a_vec3_;
+    unnamed.b_vec3_ = _e9;
+    let _e12 = unnamed_1.a_vec4_;
+    unnamed.b_vec4_ = _e12;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+}
--- a/naga/tests/out/wgsl/f16.wgsl
+++ b/naga/tests/out/wgsl/f16.wgsl
@@ -0,0 +1,167 @@
+enable f16;
+
+struct UniformCompatible {
+    val_u32_: u32,
+    val_i32_: i32,
+    val_f32_: f32,
+    val_f16_: f16,
+    val_f16_2_: vec2<f16>,
+    val_f16_3_: vec3<f16>,
+    val_f16_4_: vec4<f16>,
+    final_value: f16,
+    val_mat2x2_: mat2x2<f16>,
+    val_mat2x3_: mat2x3<f16>,
+    val_mat2x4_: mat2x4<f16>,
+    val_mat3x2_: mat3x2<f16>,
+    val_mat3x3_: mat3x3<f16>,
+    val_mat3x4_: mat3x4<f16>,
+    val_mat4x2_: mat4x2<f16>,
+    val_mat4x3_: mat4x3<f16>,
+    val_mat4x4_: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+    val_f16_array_2_: array<f16, 2>,
+}
+
+struct LayoutTest {
+    scalar1_: f16,
+    scalar2_: f16,
+    v3_: vec3<f16>,
+    tuck_in: f16,
+    scalar4_: f16,
+    larger: u32,
+}
+
+const constant_variable: f16 = 15.203125h;
+
+var<private> private_variable: f16 = 1h;
+@group(0) @binding(0) 
+var<uniform> input_uniform: UniformCompatible;
+@group(0) @binding(1) 
+var<storage> input_storage: UniformCompatible;
+@group(0) @binding(2) 
+var<storage> input_arrays: StorageCompatible;
+@group(0) @binding(3) 
+var<storage, read_write> output: UniformCompatible;
+@group(0) @binding(4) 
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+    var val: f16 = 15.203125h;
+
+    let _e4 = val;
+    val = (_e4 + -33344h);
+    let _e6 = val;
+    let _e9 = val;
+    val = (_e9 + (_e6 + 5h));
+    let _e13 = input_uniform.val_f32_;
+    let _e14 = val;
+    let _e18 = val;
+    val = (_e18 + f16((_e13 + f32(_e14))));
+    let _e22 = input_uniform.val_f16_;
+    let _e25 = val;
+    val = (_e25 + vec3(_e22).z);
+    let _e31 = input_uniform.val_f16_;
+    let _e34 = input_storage.val_f16_;
+    output.val_f16_ = (_e31 + _e34);
+    let _e40 = input_uniform.val_f16_2_;
+    let _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = (_e40 + _e43);
+    let _e49 = input_uniform.val_f16_3_;
+    let _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = (_e49 + _e52);
+    let _e58 = input_uniform.val_f16_4_;
+    let _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = (_e58 + _e61);
+    let _e67 = input_uniform.val_mat2x2_;
+    let _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = (_e67 + _e70);
+    let _e76 = input_uniform.val_mat2x3_;
+    let _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = (_e76 + _e79);
+    let _e85 = input_uniform.val_mat2x4_;
+    let _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = (_e85 + _e88);
+    let _e94 = input_uniform.val_mat3x2_;
+    let _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = (_e94 + _e97);
+    let _e103 = input_uniform.val_mat3x3_;
+    let _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = (_e103 + _e106);
+    let _e112 = input_uniform.val_mat3x4_;
+    let _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = (_e112 + _e115);
+    let _e121 = input_uniform.val_mat4x2_;
+    let _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = (_e121 + _e124);
+    let _e130 = input_uniform.val_mat4x3_;
+    let _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = (_e130 + _e133);
+    let _e139 = input_uniform.val_mat4x4_;
+    let _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = (_e139 + _e142);
+    let _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    let _e149 = val;
+    let _e151 = val;
+    val = (_e151 + abs(_e149));
+    let _e153 = val;
+    let _e154 = val;
+    let _e155 = val;
+    let _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    let _e159 = val;
+    let _e161 = val;
+    let _e164 = val;
+    val = (_e164 + dot(vec2(_e159), vec2(_e161)));
+    let _e166 = val;
+    let _e167 = val;
+    let _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    let _e171 = val;
+    let _e172 = val;
+    let _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    let _e176 = val;
+    let _e178 = val;
+    val = (_e178 + sign(_e176));
+    let _e181 = val;
+    val = (_e181 + 1h);
+    let _e185 = input_uniform.val_f16_2_;
+    let float_vec2_ = vec2<f32>(_e185);
+    output.val_f16_2_ = vec2<f16>(float_vec2_);
+    let _e192 = input_uniform.val_f16_3_;
+    let float_vec3_ = vec3<f32>(_e192);
+    output.val_f16_3_ = vec3<f16>(float_vec3_);
+    let _e199 = input_uniform.val_f16_4_;
+    let float_vec4_ = vec4<f32>(_e199);
+    output.val_f16_4_ = vec4<f16>(float_vec4_);
+    let _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = mat2x2<f16>(mat2x2<f32>(_e208));
+    let _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = mat2x3<f16>(mat2x3<f32>(_e215));
+    let _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = mat2x4<f16>(mat2x4<f32>(_e222));
+    let _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = mat3x2<f16>(mat3x2<f32>(_e229));
+    let _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = mat3x3<f16>(mat3x3<f32>(_e236));
+    let _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = mat3x4<f16>(mat3x4<f32>(_e243));
+    let _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = mat4x2<f16>(mat4x2<f32>(_e250));
+    let _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = mat4x3<f16>(mat4x3<f32>(_e257));
+    let _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = mat4x4<f16>(mat4x4<f32>(_e264));
+    let _e267 = val;
+    return _e267;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    let _e3 = f16_function(2h);
+    output.final_value = _e3;
+    return;
+}
--- a/naga/tests/spirv_capabilities.rs
+++ b/naga/tests/spirv_capabilities.rs
@@ -208,3 +208,8 @@ fn int64() {
        "#,
    );
 }
+
+#[test]
+fn float16() {
+    require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
+}
--- a/naga/tests/wgsl_errors.rs
+++ b/naga/tests/wgsl_errors.rs
@@ -8,7 +8,8 @@ use naga::valid::Capabilities;
 #[track_caller]
 fn check(input: &str, snapshot: &str) {
    let output = naga::front::wgsl::parse_str(input)
-        .expect_err("expected parser error")
+        .map(|_| panic!("expected parser error, but parsing succeeded!"))
+        .unwrap_err()
        .emit_to_string(input);
    if output != snapshot {
        for diff in diff::lines(snapshot, &output) {
@@ -22,6 +23,19 @@ fn check(input: &str, snapshot: &str) {
    }
 }

+#[track_caller]
+fn check_success(input: &str) {
+    match naga::front::wgsl::parse_str(input) {
+        Ok(_) => {}
+        Err(err) => {
+            panic!(
+                "expected success, but parsing failed with:\n{}",
+                err.emit_to_string(input)
+            );
+        }
+    }
+}
+
 #[test]
 fn very_negative_integers() {
    // wgpu#4492
@@ -827,6 +841,50 @@ fn matrix_constructor_inferred() {
    );
 }

+#[test]
+fn float16_requires_enable() {
+    check(
+        r#"
+            const a: f16 = 1.0;
+        "#,
+        r#"error: the `f16` enable extension is not enabled
+  ┌─ wgsl:2:22
+  │
+2 │             const a: f16 = 1.0;
+  │                      ^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
+
+"#,
+    );
+
+    check(
+        r#"
+            const a = 1.0h;
+        "#,
+        r#"error: the `f16` enable extension is not enabled
+  ┌─ wgsl:2:23
+  │
+2 │             const a = 1.0h;
+  │                       ^^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
+
+"#,
+    );
+}
+
+#[test]
+fn multiple_enables_valid() {
+    check_success(
+        r#"
+            enable f16;
+            enable f16;
+            const a: f16 = 1.0h;
+        "#,
+    );
+}
+
 /// Check the result of validating a WGSL program against a pattern.
 ///
 /// Unless you are generating code programmatically, the
@@ -932,6 +990,53 @@ fn int64_capability() {
    }
 }

+#[test]
+fn float16_capability() {
+    check_validation! {
+        "enable f16; var input: f16;",
+        "enable f16; var input: vec2<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}),
+            ..
+        })
+    }
+}
+
+#[test]
+fn float16_in_push_constant() {
+    check_validation! {
+        "enable f16; var<push_constant> input: f16;",
+        "enable f16; var<push_constant> input: vec2<f16>;",
+        "enable f16; var<push_constant> input: mat4x4<f16>;",
+        "enable f16; struct S { a: f16 }; var<push_constant> input: S;",
+        "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var<push_constant> input: S2;":
+        Err(naga::valid::ValidationError::GlobalVariable {
+            source: naga::valid::GlobalVariableError::InvalidPushConstantType(
+                naga::valid::PushConstantError::InvalidScalar(
+                    naga::Scalar::F16
+                )
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT
+    }
+}
+
+#[test]
+fn float16_in_atomic() {
+    check_validation! {
+        "enable f16; var<storage> a: atomic<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::InvalidAtomicWidth(
+                naga::ScalarKind::Float,
+                2
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16
+    }
+}
+
 #[test]
 fn invalid_arrays() {
    check_validation! {
@@ -1356,11 +1461,13 @@ fn invalid_blend_src() {
        @fragment
        fn main(@builtin(position) position: vec4<f32>) -> FragmentOutput { return FragmentOutput(vec4(0.0), vec4(0.0)); }
        ",
-        r###"error: `dual_source_blending` enable-extension is not enabled
+        r###"error: the `dual_source_blending` enable extension is not enabled
  ┌─ wgsl:3:27
  │
 3 │             @location(0) @blend_src(0) output0: vec4<f32>,
-  │                           ^^^^^^^^^ the `dual_source_blending` enable-extension is needed for this functionality, but it is not currently enabled
+  │                           ^^^^^^^^^ the `dual_source_blending` "Enable Extension" is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable dual_source_blending;` at the top of the shader, before any other items.

 "###,
    );
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -49,6 +49,7 @@ cfg-if.workspace = true
 ctor.workspace = true
 futures-lite.workspace = true
 glam.workspace = true
+half = { workspace = true, features = ["bytemuck"] }
 itertools.workspace = true
 image.workspace = true
 libtest-mimic.workspace = true
--- a/tests/gpu-tests/shader/mod.rs
+++ b/tests/gpu-tests/shader/mod.rs
@@ -143,8 +143,8 @@ impl ShaderTest {
            body,
            input_type: String::from("CustomStruct"),
            output_type: String::from("array<u32>"),
-            input_values: bytemuck::cast_slice(input_values).to_vec(),
-            output_values: vec![bytemuck::cast_slice(output_values).to_vec()],
+            input_values: bytemuck::pod_collect_to_vec(input_values),
+            output_values: vec![bytemuck::pod_collect_to_vec(output_values)],
            output_comparison_fn: Self::default_comparison_function::<O>,
            output_initialization: u32::MAX,
            failures: Backends::empty(),
--- a/tests/gpu-tests/shader/struct_layout.rs
+++ b/tests/gpu-tests/shader/struct_layout.rs
@@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits};
 use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};

+#[gpu_test]
+static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
+            .expect_fail(
+                FailureCase::backend(wgpu::Backends::VULKAN)
+                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
+            )
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Uniform,
+            create_struct_layout_tests(InputStorageType::Uniform),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_struct_layout_tests(InputStorageType::Storage),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_struct_layout_tests(InputStorageType::PushConstant),
+        )
+    });
+
 fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
    let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();

@@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
    tests
 }

+#[gpu_test]
+static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
 fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
    let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();

@@ -356,29 +461,10 @@ fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
 }

 #[gpu_test]
-static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
-            .expect_fail(
-                FailureCase::backend(wgpu::Backends::VULKAN)
-                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
-            )
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Uniform,
-            create_struct_layout_tests(InputStorageType::Uniform),
-        )
-    });
-
-#[gpu_test]
-static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
    .parameters(
        TestParameters::default()
+            .features(Features::SHADER_F16)
            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
            .limits(Limits::downlevel_defaults()),
    )
@@ -386,34 +472,15 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
        shader_input_output_test(
            ctx,
            InputStorageType::Storage,
-            create_struct_layout_tests(InputStorageType::Storage),
+            create_16bit_struct_layout_test(),
        )
    });

 #[gpu_test]
-static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
    .parameters(
        TestParameters::default()
-            .features(Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_struct_layout_tests(InputStorageType::PushConstant),
-        )
-    });
-
-#[gpu_test]
-static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
+            .features(Features::SHADER_F16)
            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
            .limits(Limits::downlevel_defaults()),
    )
@@ -421,41 +488,138 @@ static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
        shader_input_output_test(
            ctx,
            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
+            create_16bit_struct_layout_test(),
        )
    });

-#[gpu_test]
-static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
+    let mut tests = Vec::new();

-#[gpu_test]
-static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    fn f16asu16(f32: f32) -> u16 {
+        half::f16::from_f32(f32).to_bits()
+    }
+
+    // 16 bit alignment tests
+    {
+        let members =
+            "scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.scalar1);
+            output[1] = u32(input.scalar2);
+            output[2] = u32(input.v3.x);
+            output[3] = u32(input.v3.y);
+            output[4] = u32(input.v3.z);
+            output[5] = u32(input.tuck_in);
+            output[6] = u32(input.scalar4);
+            output[7] = u32(extractBits(input.larger, 0u, 16u));
+            output[8] = u32(extractBits(input.larger, 16u, 16u));
+        ",
+        );
+
+        tests.push(ShaderTest::new(
+            "f16 alignment".into(),
+            members.into(),
+            direct,
+            &[
+                f16asu16(0.0),
+                f16asu16(1.0),
+                f16asu16(2.0),
+                f16asu16(3.0),
+                f16asu16(4.0),
+                f16asu16(5.0),
+                f16asu16(6.0),
+                f16asu16(7.0),
+                f16asu16(8.0),
+                f16asu16(9.0),
+                10_u16,
+                11_u16,
+                // Some extra values to help debug if the test fails.
+                12_u16,
+                13_u16,
+                14_u16,
+                15_u16,
+                16_u16,
+                17_u16,
+                18_u16,
+                19_u16,
+                20_u16,
+            ],
+            &[
+                0, // scalar1
+                1, // scalar2
+                4, 5, 6,  // v3
+                7,  // tuck_in
+                8,  // scalar4
+                10, // larger[0..16]
+                11, // larger[16..32]
+            ],
+        ));
+    }
+
+    // Matrix tests
+    {
+        let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.m2[0].x);
+            output[1] = u32(input.m2[0].y);
+            output[2] = u32(input.m2[1].x);
+            output[3] = u32(input.m2[1].y);
+
+            output[4] = u32(input.m3[0].x);
+            output[5] = u32(input.m3[0].y);
+            output[6] = u32(input.m3[0].z);
+            output[7] = u32(input.m3[1].x);
+            output[8] = u32(input.m3[1].y);
+            output[9] = u32(input.m3[1].z);
+            output[10] = u32(input.m3[2].x);
+            output[11] = u32(input.m3[2].y);
+            output[12] = u32(input.m3[2].z);
+
+            output[13] = u32(input.m4[0].x);
+            output[14] = u32(input.m4[0].y);
+            output[15] = u32(input.m4[0].z);
+            output[16] = u32(input.m4[0].w);
+            output[17] = u32(input.m4[1].x);
+            output[18] = u32(input.m4[1].y);
+            output[19] = u32(input.m4[1].z);
+            output[20] = u32(input.m4[1].w);
+            output[21] = u32(input.m4[2].x);
+            output[22] = u32(input.m4[2].y);
+            output[23] = u32(input.m4[2].z);
+            output[24] = u32(input.m4[2].w);
+            output[25] = u32(input.m4[3].x);
+            output[26] = u32(input.m4[3].y);
+            output[27] = u32(input.m4[3].z);
+            output[28] = u32(input.m4[3].w);
+        ",
+        );
+
+        tests.push(ShaderTest::new(
+            "f16 matrix alignment".into(),
+            members.into(),
+            direct,
+            &(0..32).map(|x| f16asu16(x as f32)).collect::<Vec<_>>(),
+            &[
+                0, 1, // m2[0]
+                2, 3, // m2[1]
+                //
+                4, 5, 6, // m3[0]
+                8, 9, 10, // m3[1]
+                12, 13, 14, // m3[2]
+                //
+                16, 17, 18, 19, // m4[0]
+                20, 21, 22, 23, // m4[1]
+                24, 25, 26, 27, // m4[2]
+                28, 29, 30, 31, // m4[3]
+            ],
+        ));
+    }
+
+    // Insert `enable f16;` header
+    tests
+        .into_iter()
+        .map(|test| test.header("enable f16;".into()))
+        .collect()
+}
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -383,6 +383,10 @@ pub fn create_validator(
        features.contains(wgt::Features::PUSH_CONSTANTS),
    );
    caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
+    caps.set(
+        Caps::SHADER_FLOAT16,
+        features.contains(wgt::Features::SHADER_F16),
+    );
    caps.set(
        Caps::PRIMITIVE_INDEX,
        features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -425,6 +425,23 @@ impl super::Adapter {
                && features1.Int64ShaderOps.as_bool(),
        );

+        let float16_supported = {
+            let mut features4 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4::default();
+            let hr = unsafe {
+                device.CheckFeatureSupport(
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax
+                    ptr::from_mut(&mut features4).cast(),
+                    size_of::<Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4>() as _,
+                )
+            };
+            hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool()
+        };
+
+        features.set(
+            wgt::Features::SHADER_F16,
+            shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported,
+        );
+
        features.set(
            wgt::Features::TEXTURE_INT64_ATOMIC,
            shader_model >= naga::back::hlsl::ShaderModel::V6_6
@@ -612,7 +629,7 @@ impl crate::Adapter for super::Adapter {

    unsafe fn open(
        &self,
-        _features: wgt::Features,
+        features: wgt::Features,
        limits: &wgt::Limits,
        memory_hints: &wgt::MemoryHints,
    ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -633,6 +650,7 @@ impl crate::Adapter for super::Adapter {
        let device = super::Device::new(
            self.device.clone(),
            queue.clone(),
+            features,
            limits,
            memory_hints,
            self.private_caps,
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -33,9 +33,11 @@ use crate::{
 const NAGA_LOCATION_SEMANTIC: &[u8] = c"LOC".to_bytes();

 impl super::Device {
+    #[allow(clippy::too_many_arguments)]
    pub(super) fn new(
        raw: Direct3D12::ID3D12Device,
        present_queue: Direct3D12::ID3D12CommandQueue,
+        features: wgt::Features,
        limits: &wgt::Limits,
        memory_hints: &wgt::MemoryHints,
        private_caps: super::PrivateCapabilities,
@@ -178,6 +180,7 @@ impl super::Device {
                event: Event::create(false, false)?,
            },
            private_caps,
+            features,
            shared: Arc::new(shared),
            rtv_pool: Mutex::new(rtv_pool),
            dsv_pool: Mutex::new(descriptor::CpuPool::new(
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -634,6 +634,7 @@ pub struct Device {
    present_queue: Direct3D12::ID3D12CommandQueue,
    idler: Idler,
    private_caps: PrivateCapabilities,
+    features: wgt::Features,
    shared: Arc<DeviceShared>,
    // CPU only pools
    rtv_pool: Mutex<descriptor::CpuPool>,
--- a/wgpu-hal/src/dx12/shader_compilation.rs
+++ b/wgpu-hal/src/dx12/shader_compilation.rs
@@ -268,7 +268,7 @@ pub(super) fn compile_dxc(
    let raw_ep = OPCWSTR::new(raw_ep);
    let full_stage = OPCWSTR::new(full_stage);

-    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 12>::new_const();
+    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 13>::new_const();

    if let Some(source_name) = source_name.as_ref() {
        compile_args.push(source_name.ptr())
@@ -298,6 +298,10 @@ pub(super) fn compile_dxc(
        compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS);
    }

+    if device.features.contains(wgt::Features::SHADER_F16) {
+        compile_args.push(windows::core::w!("-enable-16bit-types"));
+    }
+
    let buffer = Dxc::DxcBuffer {
        Ptr: source.as_ptr().cast(),
        Size: source.len(),
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -399,6 +399,7 @@ impl PhysicalDeviceFeatures {
                    vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
                    vk::PhysicalDevice16BitStorageFeatures::default()
                        .storage_buffer16_bit_access(true)
+                        .storage_input_output16(true)
                        .uniform_and_storage_buffer16_bit_access(true),
                ))
            } else {
@@ -720,7 +721,8 @@ impl PhysicalDeviceFeatures {
                F::SHADER_F16,
                f16_i8.shader_float16 != 0
                    && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0
+                    && bit16.storage_input_output16 != 0,
            );
        }

@@ -1968,6 +1970,10 @@ impl super::Adapter {
                capabilities.push(spv::Capability::Int64);
            }

+            if features.contains(wgt::Features::SHADER_F16) {
+                capabilities.push(spv::Capability::Float16);
+            }
+
            if features.intersects(
                wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
                    | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX
--- a/wgpu-types/src/features.rs
+++ b/wgpu-types/src/features.rs
@@ -1340,18 +1340,20 @@ bitflags_array! {
        /// This is a web and native feature.
        const INDIRECT_FIRST_INSTANCE = WEBGPU_FEATURE_INDIRECT_FIRST_INSTANCE;

-        /// Allows shaders to acquire the FP16 ability
+        /// Allows shaders to use 16-bit floating point types. You may use them uniform buffers,
+        /// storage buffers, and local variables. You may not use them in push constants.
        ///
-        /// Note: this is not supported in `naga` yet, only through `spirv-passthrough` right now.
+        /// In order to use this in WGSL shaders, you must add `enable f16;` to the top of your shader,
+        /// before any global items.
        ///
        /// Supported Platforms:
        /// - Vulkan
        /// - Metal
+        /// - DX12
        ///
        /// This is a web and native feature.
        const SHADER_F16 = WEBGPU_FEATURE_SHADER_F16;

-
        /// Allows for usage of textures of format [`TextureFormat::Rg11b10Ufloat`] as a render target
        ///
        /// Supported platforms: