Files
tinygrad/extra/gemm/asm/cdna/template.s
qazal 2cc64d71b0 simplify mi350x gemm / viz asm tests (#13984)
* mi350x gemm cleanup

* asm tests work

* simpler asm tests
2026-01-03 11:11:07 +09:00

79 lines
1.8 KiB
ArmAsm

.text
.section .text.
.global gemm
.p2align 8
.type gemm,@function
gemm:
INSTRUCTIONS
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel gemm
# basic memory requirements
.amdhsa_group_segment_fixed_size 133120
.amdhsa_private_segment_fixed_size 0
.amdhsa_kernarg_size 28
# register usage (RSRC1)
.amdhsa_next_free_vgpr 504
.amdhsa_next_free_sgpr 96
# workgroup / workitem IDs (RSRC2)
.amdhsa_system_sgpr_workgroup_id_x 1
.amdhsa_system_sgpr_workgroup_id_y 1
.amdhsa_system_sgpr_workgroup_id_z 1
# user SGPRs, we only specify the kernel args ptr in s[0:1]
.amdhsa_user_sgpr_kernarg_segment_ptr 1
.amdhsa_user_sgpr_count 2
.amdhsa_user_sgpr_kernarg_preload_length 0
.amdhsa_user_sgpr_kernarg_preload_offset 0
# gfx90a / gfx940 specifics (RSRC3)
.amdhsa_accum_offset 248
.amdhsa_uses_dynamic_stack 0
.amdhsa_tg_split 0
.end_amdhsa_kernel
.amdgpu_metadata
---
amdhsa.kernels:
- .name: gemm
.symbol: gemm.kd
.args:
- .name: C
.address_space: global
.offset: 0
.size: 8
.value_kind: global_buffer
.value_type: bf16
- .name: B
.address_space: global
.offset: 8
.size: 8
.value_kind: global_buffer
.value_type: bf16
- .name: A
.address_space: global
.offset: 16
.size: 8
.value_kind: global_buffer
.value_type: bf16
- .name: sz
.offset: 24
.size: 4
.value_kind: by_value
.value_type: u32
.group_segment_fixed_size: 133120
.private_segment_fixed_size: 0
.kernarg_segment_align: 8
.kernarg_segment_size: 28
.max_flat_workgroup_size: 256
.sgpr_count: 88
.sgpr_spill_count: 0
.vgpr_count: 248
.vgpr_spill_count: 0
.wavefront_size: 64
amdhsa.version:
- 1
- 0
...
.end_amdgpu_metadata