diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index f11dcd7744..59725df3db 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -12,7 +12,7 @@ use super::{ WrappedZeroValue, }, storage::StoreValue, - BackendResult, Error, FragmentEntryPoint, Options, + BackendResult, Error, FragmentEntryPoint, Options, ShaderModel, }; use crate::{ back::{self, Baked}, @@ -3751,33 +3751,48 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { fun @ (Function::Dot4I8Packed | Function::Dot4U8Packed) => { let arg1 = arg1.unwrap(); - write!(self.out, "dot(")?; + if self.options.shader_model >= ShaderModel::V6_4 { + // Intrinsics `dot4add_{i, u}8packed` are available in SM 6.4 and later. + let function_name = match fun { + Function::Dot4I8Packed => "dot4add_i8packed", + Function::Dot4U8Packed => "dot4add_u8packed", + _ => unreachable!(), + }; + write!(self.out, "{function_name}(")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, ", 0)")?; + } else { + // Fall back to a polyfill as `dot4add_u8packed` is not available. + write!(self.out, "dot(")?; - if matches!(fun, Function::Dot4U8Packed) { - write!(self.out, "u")?; - } - write!(self.out, "int4(")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, ", ")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, " >> 8, ")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, " >> 16, ")?; - self.write_expr(module, arg, func_ctx)?; - write!(self.out, " >> 24) << 24 >> 24, ")?; + if matches!(fun, Function::Dot4U8Packed) { + write!(self.out, "u")?; + } + write!(self.out, "int4(")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, " >> 8, ")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, " >> 16, ")?; + self.write_expr(module, arg, func_ctx)?; + write!(self.out, " >> 24) << 24 >> 24, ")?; - if matches!(fun, Function::Dot4U8Packed) { - write!(self.out, "u")?; + if matches!(fun, Function::Dot4U8Packed) { + write!(self.out, "u")?; + } + write!(self.out, "int4(")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, " >> 8, ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, " >> 16, ")?; + self.write_expr(module, arg1, func_ctx)?; + write!(self.out, " >> 24) << 24 >> 24)")?; } - write!(self.out, "int4(")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, ", ")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, " >> 8, ")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, " >> 16, ")?; - self.write_expr(module, arg1, func_ctx)?; - write!(self.out, " >> 24) << 24 >> 24)")?; } Function::QuantizeToF16 => { write!(self.out, "f16tof32(f32tof16(")?; diff --git a/naga/tests/in/wgsl/functions-optimized.toml b/naga/tests/in/wgsl/functions-optimized.toml new file mode 100644 index 0000000000..ea9652fe3b --- /dev/null +++ b/naga/tests/in/wgsl/functions-optimized.toml @@ -0,0 +1,6 @@ +# Explicitly turn on optimizations for `dot4I8Packed` and `dot4U8Packed` on HLSL. + +targets = "HLSL" + +[hlsl] +shader_model = "V6_4" diff --git a/naga/tests/in/wgsl/functions-optimized.wgsl b/naga/tests/in/wgsl/functions-optimized.wgsl new file mode 100644 index 0000000000..229357523c --- /dev/null +++ b/naga/tests/in/wgsl/functions-optimized.wgsl @@ -0,0 +1,19 @@ +fn test_packed_integer_dot_product() -> u32 { + let a_5 = 1u; + let b_5 = 2u; + let c_5: i32 = dot4I8Packed(a_5, b_5); + + let a_6 = 3u; + let b_6 = 4u; + let c_6: u32 = dot4U8Packed(a_6, b_6); + + // test baking of arguments + let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6); + let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6); + return c_8; +} + +@compute @workgroup_size(1) +fn main() { + let c = test_packed_integer_dot_product(); +} diff --git a/naga/tests/in/wgsl/functions-unoptimized.toml b/naga/tests/in/wgsl/functions-unoptimized.toml new file mode 100644 index 0000000000..9bd6e0f370 --- /dev/null +++ b/naga/tests/in/wgsl/functions-unoptimized.toml @@ -0,0 +1,6 @@ +# Explicitly turn off optimizations for `dot4I8Packed` and `dot4U8Packed` on HLSL. + +targets = "HLSL" + +[hlsl] +shader_model = "V6_3" diff --git a/naga/tests/in/wgsl/functions-unoptimized.wgsl b/naga/tests/in/wgsl/functions-unoptimized.wgsl new file mode 100644 index 0000000000..229357523c --- /dev/null +++ b/naga/tests/in/wgsl/functions-unoptimized.wgsl @@ -0,0 +1,19 @@ +fn test_packed_integer_dot_product() -> u32 { + let a_5 = 1u; + let b_5 = 2u; + let c_5: i32 = dot4I8Packed(a_5, b_5); + + let a_6 = 3u; + let b_6 = 4u; + let c_6: u32 = dot4U8Packed(a_6, b_6); + + // test baking of arguments + let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6); + let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6); + return c_8; +} + +@compute @workgroup_size(1) +fn main() { + let c = test_packed_integer_dot_product(); +} diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl b/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl new file mode 100644 index 0000000000..2b2d003ea0 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-optimized.hlsl @@ -0,0 +1,19 @@ +uint test_packed_integer_dot_product() +{ + int c_5_ = dot4add_i8packed(1u, 2u, 0); + uint c_6_ = dot4add_u8packed(3u, 4u, 0); + uint _e7 = (5u + c_6_); + uint _e9 = (6u + c_6_); + int c_7_ = dot4add_i8packed(_e7, _e9, 0); + uint _e12 = (7u + c_6_); + uint _e14 = (8u + c_6_); + uint c_8_ = dot4add_u8packed(_e12, _e14, 0); + return c_8_; +} + +[numthreads(1, 1, 1)] +void main() +{ + const uint _e0 = test_packed_integer_dot_product(); + return; +} diff --git a/naga/tests/out/hlsl/wgsl-functions-optimized.ron b/naga/tests/out/hlsl/wgsl-functions-optimized.ron new file mode 100644 index 0000000000..81f3e9b295 --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-optimized.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_4", + ), + ], +) diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl b/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl new file mode 100644 index 0000000000..72156e433c --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.hlsl @@ -0,0 +1,19 @@ +uint test_packed_integer_dot_product() +{ + int c_5_ = dot(int4(1u, 1u >> 8, 1u >> 16, 1u >> 24) << 24 >> 24, int4(2u, 2u >> 8, 2u >> 16, 2u >> 24) << 24 >> 24); + uint c_6_ = dot(uint4(3u, 3u >> 8, 3u >> 16, 3u >> 24) << 24 >> 24, uint4(4u, 4u >> 8, 4u >> 16, 4u >> 24) << 24 >> 24); + uint _e7 = (5u + c_6_); + uint _e9 = (6u + c_6_); + int c_7_ = dot(int4(_e7, _e7 >> 8, _e7 >> 16, _e7 >> 24) << 24 >> 24, int4(_e9, _e9 >> 8, _e9 >> 16, _e9 >> 24) << 24 >> 24); + uint _e12 = (7u + c_6_); + uint _e14 = (8u + c_6_); + uint c_8_ = dot(uint4(_e12, _e12 >> 8, _e12 >> 16, _e12 >> 24) << 24 >> 24, uint4(_e14, _e14 >> 8, _e14 >> 16, _e14 >> 24) << 24 >> 24); + return c_8_; +} + +[numthreads(1, 1, 1)] +void main() +{ + const uint _e0 = test_packed_integer_dot_product(); + return; +} diff --git a/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron new file mode 100644 index 0000000000..f1f510d2dc --- /dev/null +++ b/naga/tests/out/hlsl/wgsl-functions-unoptimized.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_3", + ), + ], +)