amd: use kernel descriptor struct in AMDProgram (#14096)

This commit is contained in:
qazal
2026-01-11 04:25:16 -05:00
committed by GitHub
parent 9973a81356
commit d8aba24967
3 changed files with 298 additions and 13 deletions

View File

@@ -100,6 +100,8 @@ def __getattr__(nm):
"amd_hsa_kernel_code", "hsa_ext_finalize",
"hsa_ext_image", "hsa_ven_amd_aqlprofile"]]],
tarball=rocr_src, args=["-DLITTLEENDIAN_CPU"], prolog=["import os"])
case "amdgpu_kd": return load("amdgpu_kd", None, lambda: [f"{system('llvm-config-20 --includedir')}/llvm/Support/AMDHSAKernelDescriptor.h"],
args=lambda: system("llvm-config-20 --cflags").split() + ["-x", "c++"], recsym=True, parse_macros=False)
case "amd_gpu": return load("amd_gpu", None, [root/f"extra/hip_gpu_driver/{s}.h" for s in ["sdma_registers", "nvd", "gc_11_0_0_offset",
"sienna_cichlid_ip_offset"]],
args=["-I/opt/rocm/include", "-x", "c++"])

View File

@@ -0,0 +1,283 @@
# mypy: ignore-errors
import ctypes
from tinygrad.runtime.support.c import DLL, Struct, CEnum, _IO, _IOW, _IOR, _IOWR
uint8_t = ctypes.c_ubyte
_anonenum0 = CEnum(uint8_t)
FLOAT_ROUND_MODE_NEAR_EVEN = _anonenum0.define('FLOAT_ROUND_MODE_NEAR_EVEN', 0)
FLOAT_ROUND_MODE_PLUS_INFINITY = _anonenum0.define('FLOAT_ROUND_MODE_PLUS_INFINITY', 1)
FLOAT_ROUND_MODE_MINUS_INFINITY = _anonenum0.define('FLOAT_ROUND_MODE_MINUS_INFINITY', 2)
FLOAT_ROUND_MODE_ZERO = _anonenum0.define('FLOAT_ROUND_MODE_ZERO', 3)
_anonenum1 = CEnum(uint8_t)
FLOAT_DENORM_MODE_FLUSH_SRC_DST = _anonenum1.define('FLOAT_DENORM_MODE_FLUSH_SRC_DST', 0)
FLOAT_DENORM_MODE_FLUSH_DST = _anonenum1.define('FLOAT_DENORM_MODE_FLUSH_DST', 1)
FLOAT_DENORM_MODE_FLUSH_SRC = _anonenum1.define('FLOAT_DENORM_MODE_FLUSH_SRC', 2)
FLOAT_DENORM_MODE_FLUSH_NONE = _anonenum1.define('FLOAT_DENORM_MODE_FLUSH_NONE', 3)
_anonenum2 = CEnum(uint8_t)
SYSTEM_VGPR_WORKITEM_ID_X = _anonenum2.define('SYSTEM_VGPR_WORKITEM_ID_X', 0)
SYSTEM_VGPR_WORKITEM_ID_X_Y = _anonenum2.define('SYSTEM_VGPR_WORKITEM_ID_X_Y', 1)
SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = _anonenum2.define('SYSTEM_VGPR_WORKITEM_ID_X_Y_Z', 2)
SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = _anonenum2.define('SYSTEM_VGPR_WORKITEM_ID_UNDEFINED', 3)
int32_t = ctypes.c_int32
_anonenum3 = CEnum(int32_t)
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT', 0)
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH', 6)
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT = _anonenum3.define('COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT', 63)
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT', 6)
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH', 4)
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT = _anonenum3.define('COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT', 960)
COMPUTE_PGM_RSRC1_PRIORITY_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_PRIORITY_SHIFT', 10)
COMPUTE_PGM_RSRC1_PRIORITY_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_PRIORITY_WIDTH', 2)
COMPUTE_PGM_RSRC1_PRIORITY = _anonenum3.define('COMPUTE_PGM_RSRC1_PRIORITY', 3072)
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT', 12)
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_WIDTH', 2)
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32 = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32', 12288)
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT', 14)
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_WIDTH', 2)
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64 = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64', 49152)
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT', 16)
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_WIDTH', 2)
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32 = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32', 196608)
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT', 18)
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_WIDTH', 2)
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64 = _anonenum3.define('COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64', 786432)
COMPUTE_PGM_RSRC1_PRIV_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_PRIV_SHIFT', 20)
COMPUTE_PGM_RSRC1_PRIV_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_PRIV_WIDTH', 1)
COMPUTE_PGM_RSRC1_PRIV = _anonenum3.define('COMPUTE_PGM_RSRC1_PRIV', 1048576)
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT', 21)
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP', 2097152)
COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT', 21)
COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN', 2097152)
COMPUTE_PGM_RSRC1_DEBUG_MODE_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_DEBUG_MODE_SHIFT', 22)
COMPUTE_PGM_RSRC1_DEBUG_MODE_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_DEBUG_MODE_WIDTH', 1)
COMPUTE_PGM_RSRC1_DEBUG_MODE = _anonenum3.define('COMPUTE_PGM_RSRC1_DEBUG_MODE', 4194304)
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT', 23)
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE', 8388608)
COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF_SHIFT', 23)
COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF', 8388608)
COMPUTE_PGM_RSRC1_BULKY_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_BULKY_SHIFT', 24)
COMPUTE_PGM_RSRC1_BULKY_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_BULKY_WIDTH', 1)
COMPUTE_PGM_RSRC1_BULKY = _anonenum3.define('COMPUTE_PGM_RSRC1_BULKY', 16777216)
COMPUTE_PGM_RSRC1_CDBG_USER_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_CDBG_USER_SHIFT', 25)
COMPUTE_PGM_RSRC1_CDBG_USER_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_CDBG_USER_WIDTH', 1)
COMPUTE_PGM_RSRC1_CDBG_USER = _anonenum3.define('COMPUTE_PGM_RSRC1_CDBG_USER', 33554432)
COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0_SHIFT', 26)
COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0 = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0', 67108864)
COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT', 26)
COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL', 67108864)
COMPUTE_PGM_RSRC1_RESERVED1_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_RESERVED1_SHIFT', 27)
COMPUTE_PGM_RSRC1_RESERVED1_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_RESERVED1_WIDTH', 2)
COMPUTE_PGM_RSRC1_RESERVED1 = _anonenum3.define('COMPUTE_PGM_RSRC1_RESERVED1', 402653184)
COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2_SHIFT', 29)
COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2_WIDTH', 3)
COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2 = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2', -536870912)
COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT', 29)
COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE', 536870912)
COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT', 30)
COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED', 1073741824)
COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT', 31)
COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_WIDTH = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_WIDTH', 1)
COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS = _anonenum3.define('COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS', -2147483648)
_anonenum4 = CEnum(int32_t)
COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT', 0)
COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT', 1)
COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT', 1)
COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH', 5)
COMPUTE_PGM_RSRC2_USER_SGPR_COUNT = _anonenum4.define('COMPUTE_PGM_RSRC2_USER_SGPR_COUNT', 62)
COMPUTE_PGM_RSRC2_GFX6_GFX11_ENABLE_TRAP_HANDLER_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_GFX6_GFX11_ENABLE_TRAP_HANDLER_SHIFT', 6)
COMPUTE_PGM_RSRC2_GFX6_GFX11_ENABLE_TRAP_HANDLER_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_GFX6_GFX11_ENABLE_TRAP_HANDLER_WIDTH', 1)
COMPUTE_PGM_RSRC2_GFX6_GFX11_ENABLE_TRAP_HANDLER = _anonenum4.define('COMPUTE_PGM_RSRC2_GFX6_GFX11_ENABLE_TRAP_HANDLER', 64)
COMPUTE_PGM_RSRC2_GFX12_PLUS_RESERVED1_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_GFX12_PLUS_RESERVED1_SHIFT', 6)
COMPUTE_PGM_RSRC2_GFX12_PLUS_RESERVED1_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_GFX12_PLUS_RESERVED1_WIDTH', 1)
COMPUTE_PGM_RSRC2_GFX12_PLUS_RESERVED1 = _anonenum4.define('COMPUTE_PGM_RSRC2_GFX12_PLUS_RESERVED1', 64)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT', 7)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X', 128)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT', 8)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y', 256)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT', 9)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z', 512)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT', 10)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO', 1024)
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT', 11)
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_WIDTH', 2)
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID', 6144)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH_SHIFT', 13)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH', 8192)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY_SHIFT', 14)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY', 16384)
COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE_SHIFT', 15)
COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE_WIDTH', 9)
COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE = _anonenum4.define('COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE', 16744448)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT', 24)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION', 16777216)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT', 25)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE', 33554432)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT', 26)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO', 67108864)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT', 27)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW', 134217728)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT', 28)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW', 268435456)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT', 29)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT', 536870912)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT', 30)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_WIDTH', 1)
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO = _anonenum4.define('COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO', 1073741824)
COMPUTE_PGM_RSRC2_RESERVED0_SHIFT = _anonenum4.define('COMPUTE_PGM_RSRC2_RESERVED0_SHIFT', 31)
COMPUTE_PGM_RSRC2_RESERVED0_WIDTH = _anonenum4.define('COMPUTE_PGM_RSRC2_RESERVED0_WIDTH', 1)
COMPUTE_PGM_RSRC2_RESERVED0 = _anonenum4.define('COMPUTE_PGM_RSRC2_RESERVED0', -2147483648)
_anonenum5 = CEnum(int32_t)
COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT', 0)
COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_WIDTH = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_WIDTH', 6)
COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET', 63)
COMPUTE_PGM_RSRC3_GFX90A_RESERVED0_SHIFT = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_RESERVED0_SHIFT', 6)
COMPUTE_PGM_RSRC3_GFX90A_RESERVED0_WIDTH = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_RESERVED0_WIDTH', 10)
COMPUTE_PGM_RSRC3_GFX90A_RESERVED0 = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_RESERVED0', 65472)
COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT', 16)
COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_WIDTH = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT', 65536)
COMPUTE_PGM_RSRC3_GFX90A_RESERVED1_SHIFT = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_RESERVED1_SHIFT', 17)
COMPUTE_PGM_RSRC3_GFX90A_RESERVED1_WIDTH = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_RESERVED1_WIDTH', 15)
COMPUTE_PGM_RSRC3_GFX90A_RESERVED1 = _anonenum5.define('COMPUTE_PGM_RSRC3_GFX90A_RESERVED1', -131072)
_anonenum6 = CEnum(int32_t)
COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT', 0)
COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_WIDTH', 4)
COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT', 15)
COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0_SHIFT', 0)
COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0_WIDTH', 4)
COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0 = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0', 15)
COMPUTE_PGM_RSRC3_GFX10_RESERVED1_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_RESERVED1_SHIFT', 4)
COMPUTE_PGM_RSRC3_GFX10_RESERVED1_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_RESERVED1_WIDTH', 8)
COMPUTE_PGM_RSRC3_GFX10_RESERVED1 = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_RESERVED1', 4080)
COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT', 4)
COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH', 6)
COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE', 1008)
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START_SHIFT', 10)
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START', 1024)
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END_SHIFT', 11)
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END', 2048)
COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT', 4)
COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH', 8)
COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE', 4080)
COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2_SHIFT', 12)
COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2 = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2', 4096)
COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3_SHIFT', 13)
COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3 = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3', 8192)
COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN_SHIFT', 13)
COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN', 8192)
COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4_SHIFT', 14)
COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4_WIDTH', 17)
COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4 = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4', 2147467264)
COMPUTE_PGM_RSRC3_GFX10_RESERVED5_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_RESERVED5_SHIFT', 31)
COMPUTE_PGM_RSRC3_GFX10_RESERVED5_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_RESERVED5_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX10_RESERVED5 = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX10_RESERVED5', -2147483648)
COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP_SHIFT = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP_SHIFT', 31)
COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP_WIDTH = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP_WIDTH', 1)
COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP = _anonenum6.define('COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP', -2147483648)
_anonenum7 = CEnum(int32_t)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT', 0)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR', 2)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT', 2)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR', 4)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT', 3)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR', 8)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT', 4)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID', 16)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT', 5)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT', 32)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT', 6)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE', 64)
KERNEL_CODE_PROPERTY_RESERVED0_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_RESERVED0_SHIFT', 7)
KERNEL_CODE_PROPERTY_RESERVED0_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_RESERVED0_WIDTH', 3)
KERNEL_CODE_PROPERTY_RESERVED0 = _anonenum7.define('KERNEL_CODE_PROPERTY_RESERVED0', 896)
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT', 10)
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH', 1)
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = _anonenum7.define('KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32', 1024)
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT', 11)
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_WIDTH', 1)
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK = _anonenum7.define('KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK', 2048)
KERNEL_CODE_PROPERTY_RESERVED1_SHIFT = _anonenum7.define('KERNEL_CODE_PROPERTY_RESERVED1_SHIFT', 12)
KERNEL_CODE_PROPERTY_RESERVED1_WIDTH = _anonenum7.define('KERNEL_CODE_PROPERTY_RESERVED1_WIDTH', 4)
KERNEL_CODE_PROPERTY_RESERVED1 = _anonenum7.define('KERNEL_CODE_PROPERTY_RESERVED1', 61440)
_anonenum8 = CEnum(int32_t)
KERNARG_PRELOAD_SPEC_LENGTH_SHIFT = _anonenum8.define('KERNARG_PRELOAD_SPEC_LENGTH_SHIFT', 0)
KERNARG_PRELOAD_SPEC_LENGTH_WIDTH = _anonenum8.define('KERNARG_PRELOAD_SPEC_LENGTH_WIDTH', 7)
KERNARG_PRELOAD_SPEC_LENGTH = _anonenum8.define('KERNARG_PRELOAD_SPEC_LENGTH', 127)
KERNARG_PRELOAD_SPEC_OFFSET_SHIFT = _anonenum8.define('KERNARG_PRELOAD_SPEC_OFFSET_SHIFT', 7)
KERNARG_PRELOAD_SPEC_OFFSET_WIDTH = _anonenum8.define('KERNARG_PRELOAD_SPEC_OFFSET_WIDTH', 9)
KERNARG_PRELOAD_SPEC_OFFSET = _anonenum8.define('KERNARG_PRELOAD_SPEC_OFFSET', 65408)
class llvm_amdhsa_kernel_descriptor_t(Struct): pass
uint32_t = ctypes.c_uint32
int64_t = ctypes.c_int64
uint16_t = ctypes.c_uint16
llvm_amdhsa_kernel_descriptor_t._fields_ = [
('group_segment_fixed_size', uint32_t),
('private_segment_fixed_size', uint32_t),
('kernarg_size', uint32_t),
('reserved0', (uint8_t * 4)),
('kernel_code_entry_byte_offset', int64_t),
('reserved1', (uint8_t * 20)),
('compute_pgm_rsrc3', uint32_t),
('compute_pgm_rsrc1', uint32_t),
('compute_pgm_rsrc2', uint32_t),
('kernel_code_properties', uint16_t),
('kernarg_preload', uint16_t),
('reserved3', (uint8_t * 4)),
]
_anonenum9 = CEnum(uint32_t)
GROUP_SEGMENT_FIXED_SIZE_OFFSET = _anonenum9.define('GROUP_SEGMENT_FIXED_SIZE_OFFSET', 0)
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = _anonenum9.define('PRIVATE_SEGMENT_FIXED_SIZE_OFFSET', 4)
KERNARG_SIZE_OFFSET = _anonenum9.define('KERNARG_SIZE_OFFSET', 8)
RESERVED0_OFFSET = _anonenum9.define('RESERVED0_OFFSET', 12)
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = _anonenum9.define('KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET', 16)
RESERVED1_OFFSET = _anonenum9.define('RESERVED1_OFFSET', 24)
COMPUTE_PGM_RSRC3_OFFSET = _anonenum9.define('COMPUTE_PGM_RSRC3_OFFSET', 44)
COMPUTE_PGM_RSRC1_OFFSET = _anonenum9.define('COMPUTE_PGM_RSRC1_OFFSET', 48)
COMPUTE_PGM_RSRC2_OFFSET = _anonenum9.define('COMPUTE_PGM_RSRC2_OFFSET', 52)
KERNEL_CODE_PROPERTIES_OFFSET = _anonenum9.define('KERNEL_CODE_PROPERTIES_OFFSET', 56)
KERNARG_PRELOAD_OFFSET = _anonenum9.define('KERNARG_PRELOAD_OFFSET', 58)
RESERVED3_OFFSET = _anonenum9.define('RESERVED3_OFFSET', 60)

View File

@@ -11,7 +11,7 @@ from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, Profil
from tinygrad.helpers import VIZ, AMD_CC, AMD_LLVM, ceildiv
from tinygrad.renderer.cstyle import AMDHIPRenderer, AMDHIPCCRenderer
from tinygrad.renderer.llvmir import AMDLLVMRenderer
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd
from tinygrad.runtime.autogen.am import am
from tinygrad.runtime.support.elf import elf_loader
from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager
@@ -557,29 +557,29 @@ class AMDProgram(HCQProgram):
self.dev.allocator._copyin(self.lib_gpu, image)
self.dev.synchronize()
self.group_segment_size = image[rodata_entry:rodata_entry+4].cast("I")[0]
self.private_segment_size = image[rodata_entry+4:rodata_entry+8].cast("I")[0]
self.kernargs_segment_size = image[rodata_entry+8:rodata_entry+12].cast("I")[0]
desc_sz = ctypes.sizeof(amdgpu_kd.llvm_amdhsa_kernel_descriptor_t)
desc = amdgpu_kd.llvm_amdhsa_kernel_descriptor_t.from_buffer_copy(bytes(image[rodata_entry:rodata_entry+desc_sz]))
self.group_segment_size = desc.group_segment_fixed_size
self.private_segment_size = desc.private_segment_fixed_size
self.kernargs_segment_size = desc.kernarg_size
lds_size = ((self.group_segment_size + 511) // 512) & 0x1FF
if lds_size > (self.dev.iface.props['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requested: group_segment_size")
# Ensure scratch size
self.dev._ensure_has_local_memory(self.private_segment_size)
# NOTE: this is wrong, it's not this object. pad it, since it might be smaller than the struct
code = hsa.amd_kernel_code_t.from_buffer_copy(bytes(image[rodata_entry:rodata_entry+256]) + b'\x00'*256)
self.wave32: bool = code.kernel_code_properties & 0x400 == 0x400
self.wave32: bool = desc.kernel_code_properties & 0x400 == 0x400
# Set rsrc1.priv=1 on gfx11 to workaround cwsr.
self.rsrc1: int = code.compute_pgm_rsrc1 | ((1 << 20) if (11,0,0) <= self.dev.target < (12,0,0) else 0)
self.rsrc2: int = code.compute_pgm_rsrc2 | (lds_size << 15)
self.rsrc3: int = image[rodata_entry+44:rodata_entry+48].cast("I")[0] # NOTE: kernel descriptor, not in amd_kernel_code_t struct
self.rsrc1: int = desc.compute_pgm_rsrc1 | ((1 << 20) if (11,0,0) <= self.dev.target < (12,0,0) else 0)
self.rsrc2: int = desc.compute_pgm_rsrc2 | (lds_size << 15)
self.rsrc3: int = desc.compute_pgm_rsrc3
self.aql_prog_addr: int = self.lib_gpu.va_addr + rodata_entry
self.prog_addr: int = self.lib_gpu.va_addr + rodata_entry + code.kernel_code_entry_byte_offset
self.prog_addr: int = self.lib_gpu.va_addr + rodata_entry + desc.kernel_code_entry_byte_offset
# Some programs use hsa_kernel_dispatch_packet_t to read workgroup sizes during execution.
# The packet is represented as a pointer and set up in SGPRs. Space for the packet is allocated as part of the kernel arguments.
self.enable_dispatch_ptr: int = code.kernel_code_properties & hsa.AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR
self.enable_private_segment_sgpr: int = code.kernel_code_properties & hsa.AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
self.enable_dispatch_ptr: int = desc.kernel_code_properties & hsa.AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR
self.enable_private_segment_sgpr: int = desc.kernel_code_properties & hsa.AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
additional_alloc_sz = ctypes.sizeof(hsa.hsa_kernel_dispatch_packet_t) if self.enable_dispatch_ptr else 0
if dev.sqtt_enabled: self.libhash: tuple[int, int] = struct.unpack('<Q', hashlib.md5(self.lib).digest()[:8])*2