From c0f52c9dcb7fb512d209811afbb23fa8b06ad386 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Fri, 2 Jan 2026 00:10:22 +0900 Subject: [PATCH] split assembly gemm to per arch directory (#13953) --- extra/gemm/asm/{ => cdna}/gemm.s | 0 extra/gemm/asm/{ => cdna}/template.s | 0 extra/gemm/asm/{ => cdna}/test.py | 0 extra/gemm/asm/unpack_kd.py | 10 +++++----- 4 files changed, 5 insertions(+), 5 deletions(-) rename extra/gemm/asm/{ => cdna}/gemm.s (100%) rename extra/gemm/asm/{ => cdna}/template.s (100%) rename extra/gemm/asm/{ => cdna}/test.py (100%) diff --git a/extra/gemm/asm/gemm.s b/extra/gemm/asm/cdna/gemm.s similarity index 100% rename from extra/gemm/asm/gemm.s rename to extra/gemm/asm/cdna/gemm.s diff --git a/extra/gemm/asm/template.s b/extra/gemm/asm/cdna/template.s similarity index 100% rename from extra/gemm/asm/template.s rename to extra/gemm/asm/cdna/template.s diff --git a/extra/gemm/asm/test.py b/extra/gemm/asm/cdna/test.py similarity index 100% rename from extra/gemm/asm/test.py rename to extra/gemm/asm/cdna/test.py diff --git a/extra/gemm/asm/unpack_kd.py b/extra/gemm/asm/unpack_kd.py index 150e4c195b..a1447220ff 100644 --- a/extra/gemm/asm/unpack_kd.py +++ b/extra/gemm/asm/unpack_kd.py @@ -1,12 +1,12 @@ -# unpack the complete kernel descriptor of an amdgpu ELF of for gfx950 +# unpack the complete kernel descriptor of an amdgpu ELF # https://rocm.docs.amd.com/projects/llvm-project/en/latest/LLVM/llvm/html/AMDGPUUsage.html#code-object-v3-kernel-descriptor -import struct, pathlib +import struct, pathlib, sys from tinygrad.runtime.support.elf import elf_loader def bits(x, lo, hi): return (x >> lo) & ((1 << (hi - lo + 1)) - 1) def assert_zero(x, lo, hi): assert bits(x, lo, hi) == 0 -with open(fp:=pathlib.Path(__file__).parent/"lib", "rb") as f: +with open(sys.argv[1], "rb") as f: lib = f.read() image, sections, relocs = elf_loader(lib) @@ -49,7 +49,7 @@ print("COMPUTE_PGM_RSRC3: 0x%08x" % pgm_rsrc3) print("COMPUTE_PGM_RSRC1: 0x%08x" % pgm_rsrc1) print("COMPUTE_PGM_RSRC2: 0x%08x" % pgm_rsrc2) -# rsrc 3 +# rsrc 3 (gfx950) accum_offset_raw = bits(pgm_rsrc3, 0, 5) assert_zero(pgm_rsrc3, 6, 15) @@ -169,10 +169,10 @@ assert_zero(desc, 458, 459) uses_dynamic_stack = bits(desc, 459, 460) print("DESC.USES_DYNAMIC_STACK:", uses_dynamic_stack) +# gfx950 only assert_zero(desc, 460, 463) kernarg_preload_spec_length = bits(desc, 464, 470) print("DESC.KERNARG_PRELOAD_SPEC_LENGTH:", kernarg_preload_spec_length) - kernarg_preload_spec_offset = bits(desc, 471, 479) print("DESC.KERNARG_PRELOAD_SPEC_OFFSET:", kernarg_preload_spec_offset)