From a5c2cef9ab96dc4df235b9d813416a5ec2f503ee Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 26 Jul 2021 00:36:03 -0400 Subject: [PATCH] hlsl: transpose storage matrices --- src/back/hlsl/storage.rs | 29 +++++++++++++++++------------ tests/out/hlsl/access.hlsl | 4 ++-- tests/out/hlsl/shadow.hlsl | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/back/hlsl/storage.rs b/src/back/hlsl/storage.rs index 033bfcf16b..3f296fe2ad 100644 --- a/src/back/hlsl/storage.rs +++ b/src/back/hlsl/storage.rs @@ -1,3 +1,8 @@ +//! Logic related to `ByteAddressBuffer` operations. +//! +//! HLSL backend uses byte address buffers for all storage buffers in IR. +//! Matrices have to be transposed, because HLSL syntax implies row majority. + use super::{ super::{FunctionCtx, INDENT}, BackendResult, Error, @@ -116,22 +121,22 @@ impl super::Writer<'_, W> { } => { write!( self.out, - "{}{}x{}(", + "transpose({}{}x{}(", crate::ScalarKind::Float.to_hlsl_str(width)?, - columns as u8, rows as u8, + columns as u8, )?; - let row_stride = width as u32 * rows as u32; - let iter = (0..columns as u32).map(|i| { + let row_stride = width as u32 * columns as u32; + let iter = (0..rows as u32).map(|i| { let ty_inner = crate::TypeInner::Vector { - size: rows, + size: columns, kind: crate::ScalarKind::Float, width, }; (TypeResolution::Value(ty_inner), i * row_stride) }); self.write_storage_load_sequence(module, var_handle, iter, func_ctx)?; - write!(self.out, ")")?; + write!(self.out, "))")?; } crate::TypeInner::Array { base, @@ -254,23 +259,23 @@ impl super::Writer<'_, W> { let depth = indent + 1; write!( self.out, - "{}{}{}x{} {}{} = ", + "{}{}{}x{} {}{} = transpose(", INDENT.repeat(indent + 1), crate::ScalarKind::Float.to_hlsl_str(width)?, - columns as u8, rows as u8, + columns as u8, STORE_TEMP_NAME, depth, )?; self.write_store_value(module, &value, func_ctx)?; - writeln!(self.out, ";")?; + writeln!(self.out, ");")?; // then iterate the stores - let row_stride = width as u32 * rows as u32; - for i in 0..columns as u32 { + let row_stride = width as u32 * columns as u32; + for i in 0..rows as u32 { self.temp_access_chain .push(SubAccess::Offset(i * row_stride)); let ty_inner = crate::TypeInner::Vector { - size: rows, + size: columns, kind: crate::ScalarKind::Float, width, }; diff --git a/tests/out/hlsl/access.hlsl b/tests/out/hlsl/access.hlsl index 0946fb8bfc..14201429d4 100644 --- a/tests/out/hlsl/access.hlsl +++ b/tests/out/hlsl/access.hlsl @@ -19,14 +19,14 @@ float4 foo(VertexInput_foo vertexinput_foo) : SV_Position float baz = foo1; foo1 = 1.0; - float4x4 matrix1 = float4x4(asfloat(bar.Load4(0+0)), asfloat(bar.Load4(0+16)), asfloat(bar.Load4(0+32)), asfloat(bar.Load4(0+48))); + float4x4 matrix1 = transpose(float4x4(asfloat(bar.Load4(0+0)), asfloat(bar.Load4(0+16)), asfloat(bar.Load4(0+32)), asfloat(bar.Load4(0+48)))); uint2 arr[2] = {asuint(bar.Load2(4+0)), asuint(bar.Load2(4+8))}; float4 _expr13 = asfloat(bar.Load4(12+0)); float b = _expr13.x; int a = asint(bar.Load((((NagaBufferLengthRW(bar) - 80) / 4) - 2u)*4+8)); bar.Store(8+4+0, asuint(1.0)); { - float4x4 _value2 = float4x4(float4(0.0.xxxx), float4(1.0.xxxx), float4(2.0.xxxx), float4(3.0.xxxx)); + float4x4 _value2 = transpose(float4x4(float4(0.0.xxxx), float4(1.0.xxxx), float4(2.0.xxxx), float4(3.0.xxxx))); bar.Store4(0+0, asuint(_value2[0])); bar.Store4(0+16, asuint(_value2[1])); bar.Store4(0+32, asuint(_value2[2])); diff --git a/tests/out/hlsl/shadow.hlsl b/tests/out/hlsl/shadow.hlsl index 44efa15af7..4982029a1d 100644 --- a/tests/out/hlsl/shadow.hlsl +++ b/tests/out/hlsl/shadow.hlsl @@ -45,7 +45,7 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 break; } uint _expr19 = i; - Light light = {float4x4(asfloat(s_lights.Load4(_expr19*4+0+0+0)), asfloat(s_lights.Load4(_expr19*4+0+0+16)), asfloat(s_lights.Load4(_expr19*4+0+0+32)), asfloat(s_lights.Load4(_expr19*4+0+0+48))), asfloat(s_lights.Load4(_expr19*4+0+64)), asfloat(s_lights.Load4(_expr19*4+0+80))}; + Light light = {transpose(float4x4(asfloat(s_lights.Load4(_expr19*4+0+0+0)), asfloat(s_lights.Load4(_expr19*4+0+0+16)), asfloat(s_lights.Load4(_expr19*4+0+0+32)), asfloat(s_lights.Load4(_expr19*4+0+0+48)))), asfloat(s_lights.Load4(_expr19*4+0+64)), asfloat(s_lights.Load4(_expr19*4+0+80))}; uint _expr22 = i; const float _e25 = fetch_shadow(_expr22, mul(light.proj, fragmentinput_fs_main.position1)); float3 light_dir = normalize((light.pos.xyz - fragmentinput_fs_main.position1.xyz));