hlsl: transpose storage matrices

2026-04-22 03:02:01 -04:00 · 2021-07-26 00:36:03 -04:00
parent e7502d0a9b
commit a5c2cef9ab
3 changed files with 20 additions and 15 deletions
--- a/src/back/hlsl/storage.rs
+++ b/src/back/hlsl/storage.rs
@@ -1,3 +1,8 @@
+//! Logic related to `ByteAddressBuffer` operations.
+//!
+//! HLSL backend uses byte address buffers for all storage buffers in IR.
+//! Matrices have to be transposed, because HLSL syntax implies row majority.
+
 use super::{
    super::{FunctionCtx, INDENT},
    BackendResult, Error,
@@ -116,22 +121,22 @@ impl<W: fmt::Write> super::Writer<'_, W> {
            } => {
                write!(
                    self.out,
-                    "{}{}x{}(",
+                    "transpose({}{}x{}(",
                    crate::ScalarKind::Float.to_hlsl_str(width)?,
-                    columns as u8,
                    rows as u8,
+                    columns as u8,
                )?;
-                let row_stride = width as u32 * rows as u32;
-                let iter = (0..columns as u32).map(|i| {
+                let row_stride = width as u32 * columns as u32;
+                let iter = (0..rows as u32).map(|i| {
                    let ty_inner = crate::TypeInner::Vector {
-                        size: rows,
+                        size: columns,
                        kind: crate::ScalarKind::Float,
                        width,
                    };
                    (TypeResolution::Value(ty_inner), i * row_stride)
                });
                self.write_storage_load_sequence(module, var_handle, iter, func_ctx)?;
-                write!(self.out, ")")?;
+                write!(self.out, "))")?;
            }
            crate::TypeInner::Array {
                base,
@@ -254,23 +259,23 @@ impl<W: fmt::Write> super::Writer<'_, W> {
                let depth = indent + 1;
                write!(
                    self.out,
-                    "{}{}{}x{} {}{} = ",
+                    "{}{}{}x{} {}{} = transpose(",
                    INDENT.repeat(indent + 1),
                    crate::ScalarKind::Float.to_hlsl_str(width)?,
-                    columns as u8,
                    rows as u8,
+                    columns as u8,
                    STORE_TEMP_NAME,
                    depth,
                )?;
                self.write_store_value(module, &value, func_ctx)?;
-                writeln!(self.out, ";")?;
+                writeln!(self.out, ");")?;
                // then iterate the stores
-                let row_stride = width as u32 * rows as u32;
-                for i in 0..columns as u32 {
+                let row_stride = width as u32 * columns as u32;
+                for i in 0..rows as u32 {
                    self.temp_access_chain
                        .push(SubAccess::Offset(i * row_stride));
                    let ty_inner = crate::TypeInner::Vector {
-                        size: rows,
+                        size: columns,
                        kind: crate::ScalarKind::Float,
                        width,
                    };
--- a/tests/out/hlsl/access.hlsl
+++ b/tests/out/hlsl/access.hlsl
@@ -19,14 +19,14 @@ float4 foo(VertexInput_foo vertexinput_foo) : SV_Position

    float baz = foo1;
    foo1 = 1.0;
-    float4x4 matrix1 = float4x4(asfloat(bar.Load4(0+0)), asfloat(bar.Load4(0+16)), asfloat(bar.Load4(0+32)), asfloat(bar.Load4(0+48)));
+    float4x4 matrix1 = transpose(float4x4(asfloat(bar.Load4(0+0)), asfloat(bar.Load4(0+16)), asfloat(bar.Load4(0+32)), asfloat(bar.Load4(0+48))));
    uint2 arr[2] = {asuint(bar.Load2(4+0)), asuint(bar.Load2(4+8))};
    float4 _expr13 = asfloat(bar.Load4(12+0));
    float b = _expr13.x;
    int a = asint(bar.Load((((NagaBufferLengthRW(bar) - 80) / 4) - 2u)*4+8));
    bar.Store(8+4+0, asuint(1.0));
    {
-        float4x4 _value2 = float4x4(float4(0.0.xxxx), float4(1.0.xxxx), float4(2.0.xxxx), float4(3.0.xxxx));
+        float4x4 _value2 = transpose(float4x4(float4(0.0.xxxx), float4(1.0.xxxx), float4(2.0.xxxx), float4(3.0.xxxx)));
        bar.Store4(0+0, asuint(_value2[0]));
        bar.Store4(0+16, asuint(_value2[1]));
        bar.Store4(0+32, asuint(_value2[2]));
--- a/tests/out/hlsl/shadow.hlsl
+++ b/tests/out/hlsl/shadow.hlsl
@@ -45,7 +45,7 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0
            break;
        }
        uint _expr19 = i;
-        Light light = {float4x4(asfloat(s_lights.Load4(_expr19*4+0+0+0)), asfloat(s_lights.Load4(_expr19*4+0+0+16)), asfloat(s_lights.Load4(_expr19*4+0+0+32)), asfloat(s_lights.Load4(_expr19*4+0+0+48))), asfloat(s_lights.Load4(_expr19*4+0+64)), asfloat(s_lights.Load4(_expr19*4+0+80))};
+        Light light = {transpose(float4x4(asfloat(s_lights.Load4(_expr19*4+0+0+0)), asfloat(s_lights.Load4(_expr19*4+0+0+16)), asfloat(s_lights.Load4(_expr19*4+0+0+32)), asfloat(s_lights.Load4(_expr19*4+0+0+48)))), asfloat(s_lights.Load4(_expr19*4+0+64)), asfloat(s_lights.Load4(_expr19*4+0+80))};
        uint _expr22 = i;
        const float _e25 = fetch_shadow(_expr22, mul(light.proj, fragmentinput_fs_main.position1));
        float3 light_dir = normalize((light.pos.xyz - fragmentinput_fs_main.position1.xyz));