From 6a5430ab00cc0eb467de382db2d92d31d16a68ab Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Thu, 1 Jan 2026 23:01:46 +0900 Subject: [PATCH] correct args order in mi350x gemm (#13949) --- extra/gemm/asm/gemm.s | 3 ++- extra/gemm/asm/test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/extra/gemm/asm/gemm.s b/extra/gemm/asm/gemm.s index 1f1bb22611..dcb93630a6 100644 --- a/extra/gemm/asm/gemm.s +++ b/extra/gemm/asm/gemm.s @@ -1,6 +1,7 @@ // ** global buffers s_load_dwordx2 s[28:29], s[0:1], 0x0 // C - s_load_dwordx4 s[32:35], s[0:1], 0x8 // A, B + s_load_dwordx2 s[34:35], s[0:1], 0x08 // A + s_load_dwordx2 s[32:33], s[0:1], 0x10 // B // ** others kernel args s_load_dword s24, s[0:1], 0x18 // N s_load_dword s54, s[0:1], 0x1C // num work groups diff --git a/extra/gemm/asm/test.py b/extra/gemm/asm/test.py index 3b7dc3196e..d19f911a52 100644 --- a/extra/gemm/asm/test.py +++ b/extra/gemm/asm/test.py @@ -52,7 +52,7 @@ def get_asm_prg() -> ProgramSpec: lib = Device[Device.DEFAULT].compiler.compile(src) return ProgramSpec("gemm", src, Device.DEFAULT, ast, lib=lib, global_size=[NUM_WG, 1, 1], local_size=[THREADS_PER_WG, 1, 1], globals=[0, 1, 2], vars=[UOp.variable("SZ", 256, 8192), UOp.variable("NUM_WG", 1, 1024)]) -eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(B).uop.buffer, from_torch(A).uop.buffer], fixedvars={"SZ":N, "NUM_WG":NUM_WG}, +eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(A).uop.buffer, from_torch(B).uop.buffer], fixedvars={"SZ":N, "NUM_WG":NUM_WG}, prg=CompiledRunner(get_asm_prg()))) with Context(DEBUG=2):