diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a5eb50fc1..9d2ec38ae1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -112,8 +112,8 @@ jobs: python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" - name: Test DEBUG run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" - - name: Repo line count <6500 lines - run: MAX_LINE_COUNT=6500 python sz.py + - name: Repo line count <7000 lines + run: MAX_LINE_COUNT=7000 python sz.py testcpuimagenet: name: ImageNet to C Tests diff --git a/autogen_stubs.sh b/autogen_stubs.sh index fcb5ca1a19..8f7341f3d8 100755 --- a/autogen_stubs.sh +++ b/autogen_stubs.sh @@ -56,6 +56,13 @@ generate_comgr() { python3 -c "import tinygrad.runtime.autogen.comgr" } +generate_kfd() { + clang2py /usr/include/linux/kfd_ioctl.h -o $BASE/kfd.py -k cdefstum + fixup $BASE/kfd.py + sed -i "s\import ctypes\import ctypes, os\g" $BASE/kfd.py + python3 -c "import tinygrad.runtime.autogen.kfd" +} + generate_cuda() { clang2py /usr/include/cuda.h /usr/include/nvrtc.h -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so -l /usr/lib/x86_64-linux-gnu/libnvrtc.so sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py @@ -69,10 +76,19 @@ generate_hsa() { clang2py \ /opt/rocm/include/hsa/hsa.h \ /opt/rocm/include/hsa/hsa_ext_amd.h \ + /opt/rocm/include/hsa/amd_hsa_signal.h \ + /opt/rocm/include/hsa/amd_hsa_queue.h \ /opt/rocm/include/hsa/hsa_ext_finalize.h /opt/rocm/include/hsa/hsa_ext_image.h \ --clang-args="-I/opt/rocm/include" \ -o $BASE/hsa.py -l /opt/rocm/lib/libhsa-runtime64.so + + # clang2py broken when pass -x c++ to prev headers + clang2py extra/hip_gpu_driver/sdma_registers.h \ + --clang-args="-I/opt/rocm/include -x c++" \ + -o $BASE/amd_sdma.py -l /opt/rocm/lib/libhsa-runtime64.so + fixup $BASE/hsa.py + fixup $BASE/amd_sdma.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/hsa.py sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/hsa.py python3 -c "import tinygrad.runtime.autogen.hsa" @@ -83,6 +99,7 @@ elif [ "$1" == "hip" ]; then generate_hip elif [ "$1" == "comgr" ]; then generate_comgr elif [ "$1" == "cuda" ]; then generate_cuda elif [ "$1" == "hsa" ]; then generate_hsa -elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_hsa +elif [ "$1" == "kfd" ]; then generate_kfd +elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_hsa; generate_kfd else echo "usage: $0 " fi diff --git a/extra/hip_gpu_driver/hip_ioctl.py b/extra/hip_gpu_driver/hip_ioctl.py index 2611a1ac5c..f7ec947f67 100644 --- a/extra/hip_gpu_driver/hip_ioctl.py +++ b/extra/hip_gpu_driver/hip_ioctl.py @@ -44,10 +44,9 @@ def install_hook(c_function, python_function): # *** ioctl lib end *** -# clang2py kfd_ioctl.h -o kfd_ioctl.py -from extra.hip_gpu_driver import kfd_ioctl +import tinygrad.runtime.autogen.kfd as kfd_ioctl def ioctls_from_header(): - hdr = (pathlib.Path(__file__).parent.parent.parent / "extra/hip_gpu_driver/kfd_ioctl.h").read_text().replace("\\\n", "") + hdr = pathlib.Path("/usr/include/linux/kfd_ioctl.h").read_text().replace("\\\n", "") pattern = r'#define\s+(AMDKFD_IOC_[A-Z0-9_]+)\s+AMDKFD_IOW?R?\((0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)' matches = re.findall(pattern, hdr, re.MULTILINE) return {int(nr, 0x10):(name, getattr(kfd_ioctl, "struct_"+sname)) for name, nr, sname in matches} @@ -68,7 +67,8 @@ def ioctl(fd, request, argp): out = ctypes.cast(s.attrs, ctypes.POINTER(kfd_ioctl.struct_kfd_ioctl_svm_attribute)) for i in range(s.nattr): print(f"{i}: {kfd_ioctl.kfd_ioctl_svm_attr_type__enumvalues[out[i].type]:40s}: {out[i].value:#x}") else: - print("ioctl", f"{idir=} {size=} {itype=} {nr=} {fd=} {ret=}", os.readlink(f"/proc/self/fd/{fd}") if fd >= 0 else "") + print(f"{(st-start)*1000:7.2f} ms +{et*1000.:7.2f} ms : ioctl", + f"{idir=} {size=} {itype=} {nr=} {fd=} {ret=}", os.readlink(f"/proc/self/fd/{fd}") if fd >= 0 else "") return ret install_hook(libc.ioctl, ioctl) diff --git a/extra/hip_gpu_driver/kfd_ioctl.h b/extra/hip_gpu_driver/kfd_ioctl.h deleted file mode 100644 index d3e6cee06d..0000000000 --- a/extra/hip_gpu_driver/kfd_ioctl.h +++ /dev/null @@ -1,1671 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef KFD_IOCTL_H_INCLUDED -#define KFD_IOCTL_H_INCLUDED - -#include -#include - -/* - * - 1.1 - initial version - * - 1.3 - Add SMI events support - * - 1.4 - Indicate new SRAM EDC bit in device properties - * - 1.5 - Add SVM API - * - 1.6 - Query clear flags in SVM get_attr API - * - 1.7 - Checkpoint Restore (CRIU) API - * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs - * - 1.9 - Add available_memory ioctl - * - 1.10 - Add SMI profiler event log - * - 1.11 - Add unified memory for ctx save/restore area - * - 1.12 - Add DMA buf export ioctl - * - 1.13 - Add debugger API - * - 1.14 - Update kfd_event_data - */ -#define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 14 - -struct kfd_ioctl_get_version_args { - __u32 major_version; /* from KFD */ - __u32 minor_version; /* from KFD */ -}; - -/* For kfd_ioctl_create_queue_args.queue_type. */ -#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 -#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 -#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 -#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 - -#define KFD_MAX_QUEUE_PERCENTAGE 100 -#define KFD_MAX_QUEUE_PRIORITY 15 - -struct kfd_ioctl_create_queue_args { - __u64 ring_base_address; /* to KFD */ - __u64 write_pointer_address; /* from KFD */ - __u64 read_pointer_address; /* from KFD */ - __u64 doorbell_offset; /* from KFD */ - - __u32 ring_size; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 queue_type; /* to KFD */ - __u32 queue_percentage; /* to KFD */ - __u32 queue_priority; /* to KFD */ - __u32 queue_id; /* from KFD */ - - __u64 eop_buffer_address; /* to KFD */ - __u64 eop_buffer_size; /* to KFD */ - __u64 ctx_save_restore_address; /* to KFD */ - __u32 ctx_save_restore_size; /* to KFD */ - __u32 ctl_stack_size; /* to KFD */ -}; - -struct kfd_ioctl_destroy_queue_args { - __u32 queue_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_update_queue_args { - __u64 ring_base_address; /* to KFD */ - - __u32 queue_id; /* to KFD */ - __u32 ring_size; /* to KFD */ - __u32 queue_percentage; /* to KFD */ - __u32 queue_priority; /* to KFD */ -}; - -struct kfd_ioctl_set_cu_mask_args { - __u32 queue_id; /* to KFD */ - __u32 num_cu_mask; /* to KFD */ - __u64 cu_mask_ptr; /* to KFD */ -}; - -struct kfd_ioctl_get_queue_wave_state_args { - __u64 ctl_stack_address; /* to KFD */ - __u32 ctl_stack_used_size; /* from KFD */ - __u32 save_area_used_size; /* from KFD */ - __u32 queue_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_queue_snapshot_entry { - __u64 exception_status; - __u64 ring_base_address; - __u64 write_pointer_address; - __u64 read_pointer_address; - __u64 ctx_save_restore_address; - __u32 queue_id; - __u32 gpu_id; - __u32 ring_size; - __u32 queue_type; - __u32 ctx_save_restore_area_size; - __u32 reserved; -}; - -struct kfd_dbg_device_info_entry { - __u64 exception_status; - __u64 lds_base; - __u64 lds_limit; - __u64 scratch_base; - __u64 scratch_limit; - __u64 gpuvm_base; - __u64 gpuvm_limit; - __u32 gpu_id; - __u32 location_id; - __u32 vendor_id; - __u32 device_id; - __u32 revision_id; - __u32 subsystem_vendor_id; - __u32 subsystem_device_id; - __u32 fw_version; - __u32 gfx_target_version; - __u32 simd_count; - __u32 max_waves_per_simd; - __u32 array_count; - __u32 simd_arrays_per_engine; - __u32 num_xcc; - __u32 capability; - __u32 debug_prop; -}; - -/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ -#define KFD_IOC_CACHE_POLICY_COHERENT 0 -#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 - -struct kfd_ioctl_set_memory_policy_args { - __u64 alternate_aperture_base; /* to KFD */ - __u64 alternate_aperture_size; /* to KFD */ - - __u32 gpu_id; /* to KFD */ - __u32 default_policy; /* to KFD */ - __u32 alternate_policy; /* to KFD */ - __u32 pad; -}; - -/* - * All counters are monotonic. They are used for profiling of compute jobs. - * The profiling is done by userspace. - * - * In case of GPU reset, the counter should not be affected. - */ - -struct kfd_ioctl_get_clock_counters_args { - __u64 gpu_clock_counter; /* from KFD */ - __u64 cpu_clock_counter; /* from KFD */ - __u64 system_clock_counter; /* from KFD */ - __u64 system_clock_freq; /* from KFD */ - - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_process_device_apertures { - __u64 lds_base; /* from KFD */ - __u64 lds_limit; /* from KFD */ - __u64 scratch_base; /* from KFD */ - __u64 scratch_limit; /* from KFD */ - __u64 gpuvm_base; /* from KFD */ - __u64 gpuvm_limit; /* from KFD */ - __u32 gpu_id; /* from KFD */ - __u32 pad; -}; - -/* - * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use - * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an - * unlimited number of GPUs. - */ -#define NUM_OF_SUPPORTED_GPUS 7 -struct kfd_ioctl_get_process_apertures_args { - struct kfd_process_device_apertures - process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ - - /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ - __u32 num_of_nodes; - __u32 pad; -}; - -struct kfd_ioctl_get_process_apertures_new_args { - /* User allocated. Pointer to struct kfd_process_device_apertures - * filled in by Kernel - */ - __u64 kfd_process_device_apertures_ptr; - /* to KFD - indicates amount of memory present in - * kfd_process_device_apertures_ptr - * from KFD - Number of entries filled by KFD. - */ - __u32 num_of_nodes; - __u32 pad; -}; - -#define MAX_ALLOWED_NUM_POINTS 100 -#define MAX_ALLOWED_AW_BUFF_SIZE 4096 -#define MAX_ALLOWED_WAC_BUFF_SIZE 128 - -struct kfd_ioctl_dbg_register_args { - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_dbg_unregister_args { - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_dbg_address_watch_args { - __u64 content_ptr; /* a pointer to the actual content */ - __u32 gpu_id; /* to KFD */ - __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ -}; - -struct kfd_ioctl_dbg_wave_control_args { - __u64 content_ptr; /* a pointer to the actual content */ - __u32 gpu_id; /* to KFD */ - __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ -}; -#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1 - -/* queue states for suspend/resume */ -#define KFD_DBG_QUEUE_ERROR_BIT 30 -#define KFD_DBG_QUEUE_INVALID_BIT 31 -#define KFD_DBG_QUEUE_ERROR_MASK (1 << KFD_DBG_QUEUE_ERROR_BIT) -#define KFD_DBG_QUEUE_INVALID_MASK (1 << KFD_DBG_QUEUE_INVALID_BIT) - -#define KFD_INVALID_GPUID 0xffffffff -#define KFD_INVALID_QUEUEID 0xffffffff -#define KFD_INVALID_FD 0xffffffff - -enum kfd_dbg_trap_override_mode { - KFD_DBG_TRAP_OVERRIDE_OR = 0, - KFD_DBG_TRAP_OVERRIDE_REPLACE = 1 -}; -enum kfd_dbg_trap_mask { - KFD_DBG_TRAP_MASK_FP_INVALID = 1, - KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2, - KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4, - KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8, - KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16, - KFD_DBG_TRAP_MASK_FP_INEXACT = 32, - KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64, - KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH = 128, - KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256, - KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START = (1 << 30), - KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END = (1 << 31) -}; - -/* Wave launch modes */ -enum kfd_dbg_trap_wave_launch_mode { - KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL = 0, - KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT = 1, - KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG = 3 -}; - -/* Address watch modes */ -enum kfd_dbg_trap_address_watch_mode { - KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ = 0, - KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD = 1, - KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC = 2, - KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL = 3 -}; - -/* Additional wave settings */ -enum kfd_dbg_trap_flags { - KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1, -}; - -enum kfd_dbg_trap_exception_code { - EC_NONE = 0, - /* per queue */ - EC_QUEUE_WAVE_ABORT = 1, - EC_QUEUE_WAVE_TRAP = 2, - EC_QUEUE_WAVE_MATH_ERROR = 3, - EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4, - EC_QUEUE_WAVE_MEMORY_VIOLATION = 5, - EC_QUEUE_WAVE_APERTURE_VIOLATION = 6, - EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16, - EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17, - EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18, - EC_QUEUE_PACKET_RESERVED = 19, - EC_QUEUE_PACKET_UNSUPPORTED = 20, - EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21, - EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22, - EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23, - EC_QUEUE_PREEMPTION_ERROR = 30, - EC_QUEUE_NEW = 31, - /* per device */ - EC_DEVICE_QUEUE_DELETE = 32, - EC_DEVICE_MEMORY_VIOLATION = 33, - EC_DEVICE_RAS_ERROR = 34, - EC_DEVICE_FATAL_HALT = 35, - EC_DEVICE_NEW = 36, - /* per process */ - EC_PROCESS_RUNTIME = 48, - EC_PROCESS_DEVICE_REMOVE = 49, - EC_MAX -}; - -/* Mask generated by ecode defined in enum above. */ -#define KFD_EC_MASK(ecode) (1ULL << (ecode - 1)) - -/* Masks for exception code type checks below. */ -#define KFD_EC_MASK_QUEUE (KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) | \ - KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | \ - KFD_EC_MASK(EC_QUEUE_WAVE_MATH_ERROR) | \ - KFD_EC_MASK(EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION) | \ - KFD_EC_MASK(EC_QUEUE_WAVE_MEMORY_VIOLATION) | \ - KFD_EC_MASK(EC_QUEUE_WAVE_APERTURE_VIOLATION) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ - KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED) | \ - KFD_EC_MASK(EC_QUEUE_PREEMPTION_ERROR) | \ - KFD_EC_MASK(EC_QUEUE_NEW)) -#define KFD_EC_MASK_DEVICE (KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE) | \ - KFD_EC_MASK(EC_DEVICE_RAS_ERROR) | \ - KFD_EC_MASK(EC_DEVICE_FATAL_HALT) | \ - KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION) | \ - KFD_EC_MASK(EC_DEVICE_NEW)) -#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \ - KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE)) - -/* Checks for exception code types for KFD search. */ -#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) -#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) -#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) - -enum kfd_dbg_runtime_state { - DEBUG_RUNTIME_STATE_DISABLED = 0, - DEBUG_RUNTIME_STATE_ENABLED = 1, - DEBUG_RUNTIME_STATE_ENABLED_BUSY = 2, - DEBUG_RUNTIME_STATE_ENABLED_ERROR = 3 -}; - -struct kfd_runtime_info { - __u64 r_debug; - __u32 runtime_state; - __u32 ttmp_setup; -}; - -/* Enable modes for runtime enable */ -#define KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK 1 -#define KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK 2 -#define KFD_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK 0x80000000 - -/** - * kfd_ioctl_runtime_enable_args - Arguments for runtime enable - * - * Coordinates debug exception signalling and debug device enablement with runtime. - * - * @r_debug - pointer to user struct for sharing information between ROCr and the debuggger - * @mode_mask - mask to set mode - * KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK - enable runtime for debugging, otherwise disable - * KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK - enable trap temporary setup (ignore on disable) - * - * Return - 0 on SUCCESS. - * - EBUSY if runtime enable call already pending. - * - EEXIST if user queues already active prior to call. - * If process is debug enabled, runtime enable will enable debug devices and - * wait for debugger process to send runtime exception EC_PROCESS_RUNTIME - * to unblock - see kfd_ioctl_dbg_trap_args. - * - */ -struct kfd_ioctl_runtime_enable_args { - __u64 r_debug; - __u32 mode_mask; - __u32 capabilities_mask; -}; - -/* Context save area header information */ -struct kfd_context_save_area_header { - struct { - __u32 control_stack_offset; - __u32 control_stack_size; - __u32 wave_state_offset; - __u32 wave_state_size; - } wave_state; - __u32 debug_offset; - __u32 debug_size; - __u64 err_payload_addr; - __u32 err_event_id; - __u32 reserved1; -}; - -/* - * Debug operations - * - * For specifics on usage and return values, see documentation per operation - * below. Otherwise, generic error returns apply: - * - ESRCH if the process to debug does not exist. - * - * - EINVAL (with KFD_IOC_DBG_TRAP_ENABLE exempt) if operation - * KFD_IOC_DBG_TRAP_ENABLE has not succeeded prior. - * Also returns this error if GPU hardware scheduling is not supported. - * - * - EPERM (with KFD_IOC_DBG_TRAP_DISABLE exempt) if target process is not - * PTRACE_ATTACHED. KFD_IOC_DBG_TRAP_DISABLE is exempt to allow - * clean up of debug mode as long as process is debug enabled. - * - * - EACCES if any DBG_HW_OP (debug hardware operation) is requested when - * AMDKFD_IOC_RUNTIME_ENABLE has not succeeded prior. - * - * - ENODEV if any GPU does not support debugging on a DBG_HW_OP call. - * - * - Other errors may be returned when a DBG_HW_OP occurs while the GPU - * is in a fatal state. - * - */ -enum kfd_dbg_trap_operations { - KFD_IOC_DBG_TRAP_ENABLE = 0, - KFD_IOC_DBG_TRAP_DISABLE = 1, - KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT = 2, - KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED = 3, - KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE = 4, /* DBG_HW_OP */ - KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE = 5, /* DBG_HW_OP */ - KFD_IOC_DBG_TRAP_SUSPEND_QUEUES = 6, /* DBG_HW_OP */ - KFD_IOC_DBG_TRAP_RESUME_QUEUES = 7, /* DBG_HW_OP */ - KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH = 8, /* DBG_HW_OP */ - KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH = 9, /* DBG_HW_OP */ - KFD_IOC_DBG_TRAP_SET_FLAGS = 10, - KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT = 11, - KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO = 12, - KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT = 13, - KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT = 14 -}; - -/** - * kfd_ioctl_dbg_trap_enable_args - * - * Arguments for KFD_IOC_DBG_TRAP_ENABLE. - * - * Enables debug session for target process. Call @op KFD_IOC_DBG_TRAP_DISABLE in - * kfd_ioctl_dbg_trap_args to disable debug session. - * - * @exception_mask (IN) - exceptions to raise to the debugger - * @rinfo_ptr (IN) - pointer to runtime info buffer (see kfd_runtime_info) - * @rinfo_size (IN/OUT) - size of runtime info buffer in bytes - * @dbg_fd (IN) - fd the KFD will nofify the debugger with of raised - * exceptions set in exception_mask. - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * Copies KFD saved kfd_runtime_info to @rinfo_ptr on enable. - * Size of kfd_runtime saved by the KFD returned to @rinfo_size. - * - EBADF if KFD cannot get a reference to dbg_fd. - * - EFAULT if KFD cannot copy runtime info to rinfo_ptr. - * - EINVAL if target process is already debug enabled. - * - */ -struct kfd_ioctl_dbg_trap_enable_args { - __u64 exception_mask; - __u64 rinfo_ptr; - __u32 rinfo_size; - __u32 dbg_fd; -}; - -/** - * kfd_ioctl_dbg_trap_send_runtime_event_args - * - * - * Arguments for KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT. - * Raises exceptions to runtime. - * - * @exception_mask (IN) - exceptions to raise to runtime - * @gpu_id (IN) - target device id - * @queue_id (IN) - target queue id - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * - ENODEV if gpu_id not found. - * If exception_mask contains EC_PROCESS_RUNTIME, unblocks pending - * AMDKFD_IOC_RUNTIME_ENABLE call - see kfd_ioctl_runtime_enable_args. - * All other exceptions are raised to runtime through err_payload_addr. - * See kfd_context_save_area_header. - */ -struct kfd_ioctl_dbg_trap_send_runtime_event_args { - __u64 exception_mask; - __u32 gpu_id; - __u32 queue_id; -}; - -/** - * kfd_ioctl_dbg_trap_set_exceptions_enabled_args - * - * Arguments for KFD_IOC_SET_EXCEPTIONS_ENABLED - * Set new exceptions to be raised to the debugger. - * - * @exception_mask (IN) - new exceptions to raise the debugger - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - */ -struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args { - __u64 exception_mask; -}; - -/** - * kfd_ioctl_dbg_trap_set_wave_launch_override_args - * - * Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE - * Enable HW exceptions to raise trap. - * - * @override_mode (IN) - see kfd_dbg_trap_override_mode - * @enable_mask (IN/OUT) - reference kfd_dbg_trap_mask. - * IN is the override modes requested to be enabled. - * OUT is referenced in Return below. - * @support_request_mask (IN/OUT) - reference kfd_dbg_trap_mask. - * IN is the override modes requested for support check. - * OUT is referenced in Return below. - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * Previous enablement is returned in @enable_mask. - * Actual override support is returned in @support_request_mask. - * - EINVAL if override mode is not supported. - * - EACCES if trap support requested is not actually supported. - * i.e. enable_mask (IN) is not a subset of support_request_mask (OUT). - * Otherwise it is considered a generic error (see kfd_dbg_trap_operations). - */ -struct kfd_ioctl_dbg_trap_set_wave_launch_override_args { - __u32 override_mode; - __u32 enable_mask; - __u32 support_request_mask; - __u32 pad; -}; - -/** - * kfd_ioctl_dbg_trap_set_wave_launch_mode_args - * - * Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE - * Set wave launch mode. - * - * @mode (IN) - see kfd_dbg_trap_wave_launch_mode - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - */ -struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args { - __u32 launch_mode; - __u32 pad; -}; - -/** - * kfd_ioctl_dbg_trap_suspend_queues_ags - * - * Arguments for KFD_IOC_DBG_TRAP_SUSPEND_QUEUES - * Suspend queues. - * - * @exception_mask (IN) - raised exceptions to clear - * @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id) - * to suspend - * @num_queues (IN) - number of queues to suspend in @queue_array_ptr - * @grace_period (IN) - wave time allowance before preemption - * per 1K GPU clock cycle unit - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Destruction of a suspended queue is blocked until the queue is - * resumed. This allows the debugger to access queue information and - * the its context save area without running into a race condition on - * queue destruction. - * Automatically copies per queue context save area header information - * into the save area base - * (see kfd_queue_snapshot_entry and kfd_context_save_area_header). - * - * Return - Number of queues suspended on SUCCESS. - * . KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK masked - * for each queue id in @queue_array_ptr array reports unsuccessful - * suspend reason. - * KFD_DBG_QUEUE_ERROR_MASK = HW failure. - * KFD_DBG_QUEUE_INVALID_MASK = queue does not exist, is new or - * is being destroyed. - */ -struct kfd_ioctl_dbg_trap_suspend_queues_args { - __u64 exception_mask; - __u64 queue_array_ptr; - __u32 num_queues; - __u32 grace_period; -}; - -/** - * kfd_ioctl_dbg_trap_resume_queues_args - * - * Arguments for KFD_IOC_DBG_TRAP_RESUME_QUEUES - * Resume queues. - * - * @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id) - * to resume - * @num_queues (IN) - number of queues to resume in @queue_array_ptr - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - Number of queues resumed on SUCCESS. - * KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK mask - * for each queue id in @queue_array_ptr array reports unsuccessful - * resume reason. - * KFD_DBG_QUEUE_ERROR_MASK = HW failure. - * KFD_DBG_QUEUE_INVALID_MASK = queue does not exist. - */ -struct kfd_ioctl_dbg_trap_resume_queues_args { - __u64 queue_array_ptr; - __u32 num_queues; - __u32 pad; -}; - -/** - * kfd_ioctl_dbg_trap_set_node_address_watch_args - * - * Arguments for KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH - * Sets address watch for device. - * - * @address (IN) - watch address to set - * @mode (IN) - see kfd_dbg_trap_address_watch_mode - * @mask (IN) - watch address mask - * @gpu_id (IN) - target gpu to set watch point - * @id (OUT) - watch id allocated - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * Allocated watch ID returned to @id. - * - ENODEV if gpu_id not found. - * - ENOMEM if watch IDs can be allocated - */ -struct kfd_ioctl_dbg_trap_set_node_address_watch_args { - __u64 address; - __u32 mode; - __u32 mask; - __u32 gpu_id; - __u32 id; -}; - -/** - * kfd_ioctl_dbg_trap_clear_node_address_watch_args - * - * Arguments for KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH - * Clear address watch for device. - * - * @gpu_id (IN) - target device to clear watch point - * @id (IN) - allocated watch id to clear - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * - ENODEV if gpu_id not found. - * - EINVAL if watch ID has not been allocated. - */ -struct kfd_ioctl_dbg_trap_clear_node_address_watch_args { - __u32 gpu_id; - __u32 id; -}; - -/** - * kfd_ioctl_dbg_trap_set_flags_args - * - * Arguments for KFD_IOC_DBG_TRAP_SET_FLAGS - * Sets flags for wave behaviour. - * - * @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * - EACCESS if any debug device does not allow flag options. - */ -struct kfd_ioctl_dbg_trap_set_flags_args { - __u32 flags; - __u32 pad; -}; - -/** - * kfd_ioctl_dbg_trap_query_debug_event_args - * - * Arguments for KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT - * - * Find one or more raised exceptions. This function can return multiple - * exceptions from a single queue or a single device with one call. To find - * all raised exceptions, this function must be called repeatedly until it - * returns -EAGAIN. Returned exceptions can optionally be cleared by - * setting the corresponding bit in the @exception_mask input parameter. - * However, clearing an exception prevents retrieving further information - * about it with KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO. - * - * @exception_mask (IN/OUT) - exception to clear (IN) and raised (OUT) - * @gpu_id (OUT) - gpu id of exceptions raised - * @queue_id (OUT) - queue id of exceptions raised - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on raised exception found - * Raised exceptions found are returned in @exception mask - * with reported source id returned in @gpu_id or @queue_id. - * - EAGAIN if no raised exception has been found - */ -struct kfd_ioctl_dbg_trap_query_debug_event_args { - __u64 exception_mask; - __u32 gpu_id; - __u32 queue_id; -}; - -/** - * kfd_ioctl_dbg_trap_query_exception_info_args - * - * Arguments KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO - * Get additional info on raised exception. - * - * @info_ptr (IN) - pointer to exception info buffer to copy to - * @info_size (IN/OUT) - exception info buffer size (bytes) - * @source_id (IN) - target gpu or queue id - * @exception_code (IN) - target exception - * @clear_exception (IN) - clear raised @exception_code exception - * (0 = false, 1 = true) - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * If @exception_code is EC_DEVICE_MEMORY_VIOLATION, copy @info_size(OUT) - * bytes of memory exception data to @info_ptr. - * If @exception_code is EC_PROCESS_RUNTIME, copy saved - * kfd_runtime_info to @info_ptr. - * Actual required @info_ptr size (bytes) is returned in @info_size. - */ -struct kfd_ioctl_dbg_trap_query_exception_info_args { - __u64 info_ptr; - __u32 info_size; - __u32 source_id; - __u32 exception_code; - __u32 clear_exception; -}; - -/** - * kfd_ioctl_dbg_trap_get_queue_snapshot_args - * - * Arguments KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT - * Get queue information. - * - * @exception_mask (IN) - exceptions raised to clear - * @snapshot_buf_ptr (IN) - queue snapshot entry buffer (see kfd_queue_snapshot_entry) - * @num_queues (IN/OUT) - number of queue snapshot entries - * The debugger specifies the size of the array allocated in @num_queues. - * KFD returns the number of queues that actually existed. If this is - * larger than the size specified by the debugger, KFD will not overflow - * the array allocated by the debugger. - * - * @entry_size (IN/OUT) - size per entry in bytes - * The debugger specifies sizeof(struct kfd_queue_snapshot_entry) in - * @entry_size. KFD returns the number of bytes actually populated per - * entry. The debugger should use the KFD_IOCTL_MINOR_VERSION to determine, - * which fields in struct kfd_queue_snapshot_entry are valid. This allows - * growing the ABI in a backwards compatible manner. - * Note that entry_size(IN) should still be used to stride the snapshot buffer in the - * event that it's larger than actual kfd_queue_snapshot_entry. - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * Copies @num_queues(IN) queue snapshot entries of size @entry_size(IN) - * into @snapshot_buf_ptr if @num_queues(IN) > 0. - * Otherwise return @num_queues(OUT) queue snapshot entries that exist. - */ -struct kfd_ioctl_dbg_trap_queue_snapshot_args { - __u64 exception_mask; - __u64 snapshot_buf_ptr; - __u32 num_queues; - __u32 entry_size; -}; - -/** - * kfd_ioctl_dbg_trap_get_device_snapshot_args - * - * Arguments for KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT - * Get device information. - * - * @exception_mask (IN) - exceptions raised to clear - * @snapshot_buf_ptr (IN) - pointer to snapshot buffer (see kfd_dbg_device_info_entry) - * @num_devices (IN/OUT) - number of debug devices to snapshot - * The debugger specifies the size of the array allocated in @num_devices. - * KFD returns the number of devices that actually existed. If this is - * larger than the size specified by the debugger, KFD will not overflow - * the array allocated by the debugger. - * - * @entry_size (IN/OUT) - size per entry in bytes - * The debugger specifies sizeof(struct kfd_dbg_device_info_entry) in - * @entry_size. KFD returns the number of bytes actually populated. The - * debugger should use KFD_IOCTL_MINOR_VERSION to determine, which fields - * in struct kfd_dbg_device_info_entry are valid. This allows growing the - * ABI in a backwards compatible manner. - * Note that entry_size(IN) should still be used to stride the snapshot buffer in the - * event that it's larger than actual kfd_dbg_device_info_entry. - * - * Generic errors apply (see kfd_dbg_trap_operations). - * Return - 0 on SUCCESS. - * Copies @num_devices(IN) device snapshot entries of size @entry_size(IN) - * into @snapshot_buf_ptr if @num_devices(IN) > 0. - * Otherwise return @num_devices(OUT) queue snapshot entries that exist. - */ -struct kfd_ioctl_dbg_trap_device_snapshot_args { - __u64 exception_mask; - __u64 snapshot_buf_ptr; - __u32 num_devices; - __u32 entry_size; -}; - -/** - * kfd_ioctl_dbg_trap_args - * - * Arguments to debug target process. - * - * @pid - target process to debug - * @op - debug operation (see kfd_dbg_trap_operations) - * - * @op determines which union struct args to use. - * Refer to kern docs for each kfd_ioctl_dbg_trap_*_args struct. - */ -struct kfd_ioctl_dbg_trap_args { - __u32 pid; - __u32 op; - - union { - struct kfd_ioctl_dbg_trap_enable_args enable; - struct kfd_ioctl_dbg_trap_send_runtime_event_args send_runtime_event; - struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args set_exceptions_enabled; - struct kfd_ioctl_dbg_trap_set_wave_launch_override_args launch_override; - struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args launch_mode; - struct kfd_ioctl_dbg_trap_suspend_queues_args suspend_queues; - struct kfd_ioctl_dbg_trap_resume_queues_args resume_queues; - struct kfd_ioctl_dbg_trap_set_node_address_watch_args set_node_address_watch; - struct kfd_ioctl_dbg_trap_clear_node_address_watch_args clear_node_address_watch; - struct kfd_ioctl_dbg_trap_set_flags_args set_flags; - struct kfd_ioctl_dbg_trap_query_debug_event_args query_debug_event; - struct kfd_ioctl_dbg_trap_query_exception_info_args query_exception_info; - struct kfd_ioctl_dbg_trap_queue_snapshot_args queue_snapshot; - struct kfd_ioctl_dbg_trap_device_snapshot_args device_snapshot; - }; -}; - -/* Matching HSA_EVENTTYPE */ -#define KFD_IOC_EVENT_SIGNAL 0 -#define KFD_IOC_EVENT_NODECHANGE 1 -#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 -#define KFD_IOC_EVENT_HW_EXCEPTION 3 -#define KFD_IOC_EVENT_SYSTEM_EVENT 4 -#define KFD_IOC_EVENT_DEBUG_EVENT 5 -#define KFD_IOC_EVENT_PROFILE_EVENT 6 -#define KFD_IOC_EVENT_QUEUE_EVENT 7 -#define KFD_IOC_EVENT_MEMORY 8 - -#define KFD_IOC_WAIT_RESULT_COMPLETE 0 -#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 -#define KFD_IOC_WAIT_RESULT_FAIL 2 - -#define KFD_SIGNAL_EVENT_LIMIT 4096 - -/* For kfd_event_data.hw_exception_data.reset_type. */ -#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 -#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 - -/* For kfd_event_data.hw_exception_data.reset_cause. */ -#define KFD_HW_EXCEPTION_GPU_HANG 0 -#define KFD_HW_EXCEPTION_ECC 1 - -/* For kfd_hsa_memory_exception_data.ErrorType */ -#define KFD_MEM_ERR_NO_RAS 0 -#define KFD_MEM_ERR_SRAM_ECC 1 -#define KFD_MEM_ERR_POISON_CONSUMED 2 -#define KFD_MEM_ERR_GPU_HANG 3 - -struct kfd_ioctl_create_event_args { - __u64 event_page_offset; /* from KFD */ - __u32 event_trigger_data; /* from KFD - signal events only */ - __u32 event_type; /* to KFD */ - __u32 auto_reset; /* to KFD */ - __u32 node_id; /* to KFD - only valid for certain - event types */ - __u32 event_id; /* from KFD */ - __u32 event_slot_index; /* from KFD */ -}; - -struct kfd_ioctl_destroy_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_set_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_reset_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_memory_exception_failure { - __u32 NotPresent; /* Page not present or supervisor privilege */ - __u32 ReadOnly; /* Write access to a read-only page */ - __u32 NoExecute; /* Execute access to a page marked NX */ - __u32 imprecise; /* Can't determine the exact fault address */ -}; - -/* memory exception data */ -struct kfd_hsa_memory_exception_data { - struct kfd_memory_exception_failure failure; - __u64 va; - __u32 gpu_id; - __u32 ErrorType; /* 0 = no RAS error, - * 1 = ECC_SRAM, - * 2 = Link_SYNFLOOD (poison), - * 3 = GPU hang (not attributable to a specific cause), - * other values reserved - */ -}; - -/* hw exception data */ -struct kfd_hsa_hw_exception_data { - __u32 reset_type; - __u32 reset_cause; - __u32 memory_lost; - __u32 gpu_id; -}; - -/* hsa signal event data */ -struct kfd_hsa_signal_event_data { - __u64 last_event_age; /* to and from KFD */ -}; - -/* Event data */ -struct kfd_event_data { - union { - /* From KFD */ - struct kfd_hsa_memory_exception_data memory_exception_data; - struct kfd_hsa_hw_exception_data hw_exception_data; - /* To and From KFD */ - struct kfd_hsa_signal_event_data signal_event_data; - }; - __u64 kfd_event_data_ext; /* pointer to an extension structure - for future exception types */ - __u32 event_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_wait_events_args { - __u64 events_ptr; /* pointed to struct - kfd_event_data array, to KFD */ - __u32 num_events; /* to KFD */ - __u32 wait_for_all; /* to KFD */ - __u32 timeout; /* to KFD */ - __u32 wait_result; /* from KFD */ -}; - -struct kfd_ioctl_set_scratch_backing_va_args { - __u64 va_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_get_tile_config_args { - /* to KFD: pointer to tile array */ - __u64 tile_config_ptr; - /* to KFD: pointer to macro tile array */ - __u64 macro_tile_config_ptr; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_tile_configs; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_macro_tile_configs; - - __u32 gpu_id; /* to KFD */ - __u32 gb_addr_config; /* from KFD */ - __u32 num_banks; /* from KFD */ - __u32 num_ranks; /* from KFD */ - /* struct size can be extended later if needed - * without breaking ABI compatibility - */ -}; - -struct kfd_ioctl_set_trap_handler_args { - __u64 tba_addr; /* to KFD */ - __u64 tma_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_acquire_vm_args { - __u32 drm_fd; /* to KFD */ - __u32 gpu_id; /* to KFD */ -}; - -/* Allocation flags: memory types */ -#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) -#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) -#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) -#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) -#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) -/* Allocation flags: attributes/access options */ -#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) -#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) -#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) -#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) -#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) -#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) -#define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED (1 << 25) -#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT (1 << 24) - -/* Allocate memory for later SVM (shared virtual memory) mapping. - * - * @va_addr: virtual address of the memory to be allocated - * all later mappings on all GPUs will use this address - * @size: size in bytes - * @handle: buffer handle returned to user mode, used to refer to - * this allocation for mapping, unmapping and freeing - * @mmap_offset: for CPU-mapping the allocation by mmapping a render node - * for userptrs this is overloaded to specify the CPU address - * @gpu_id: device identifier - * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above - */ -struct kfd_ioctl_alloc_memory_of_gpu_args { - __u64 va_addr; /* to KFD */ - __u64 size; /* to KFD */ - __u64 handle; /* from KFD */ - __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ - __u32 gpu_id; /* to KFD */ - __u32 flags; -}; - -/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu - * - * @handle: memory handle returned by alloc - */ -struct kfd_ioctl_free_memory_of_gpu_args { - __u64 handle; /* to KFD */ -}; - -/* Inquire available memory with kfd_ioctl_get_available_memory - * - * @available: memory available for alloc - */ -struct kfd_ioctl_get_available_memory_args { - __u64 available; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -/* Map memory to one or more GPUs - * - * @handle: memory handle returned by alloc - * @device_ids_array_ptr: array of gpu_ids (__u32 per device) - * @n_devices: number of devices in the array - * @n_success: number of devices mapped successfully - * - * @n_success returns information to the caller how many devices from - * the start of the array have mapped the buffer successfully. It can - * be passed into a subsequent retry call to skip those devices. For - * the first call the caller should initialize it to 0. - * - * If the ioctl completes with return code 0 (success), n_success == - * n_devices. - */ -struct kfd_ioctl_map_memory_to_gpu_args { - __u64 handle; /* to KFD */ - __u64 device_ids_array_ptr; /* to KFD */ - __u32 n_devices; /* to KFD */ - __u32 n_success; /* to/from KFD */ -}; - -/* Unmap memory from one or more GPUs - * - * same arguments as for mapping - */ -struct kfd_ioctl_unmap_memory_from_gpu_args { - __u64 handle; /* to KFD */ - __u64 device_ids_array_ptr; /* to KFD */ - __u32 n_devices; /* to KFD */ - __u32 n_success; /* to/from KFD */ -}; - -/* Allocate GWS for specific queue - * - * @queue_id: queue's id that GWS is allocated for - * @num_gws: how many GWS to allocate - * @first_gws: index of the first GWS allocated. - * only support contiguous GWS allocation - */ -struct kfd_ioctl_alloc_queue_gws_args { - __u32 queue_id; /* to KFD */ - __u32 num_gws; /* to KFD */ - __u32 first_gws; /* from KFD */ - __u32 pad; -}; - -struct kfd_ioctl_get_dmabuf_info_args { - __u64 size; /* from KFD */ - __u64 metadata_ptr; /* to KFD */ - __u32 metadata_size; /* to KFD (space allocated by user) - * from KFD (actual metadata size) - */ - __u32 gpu_id; /* from KFD */ - __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ - __u32 dmabuf_fd; /* to KFD */ -}; - -struct kfd_ioctl_import_dmabuf_args { - __u64 va_addr; /* to KFD */ - __u64 handle; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 dmabuf_fd; /* to KFD */ -}; - -struct kfd_ioctl_export_dmabuf_args { - __u64 handle; /* to KFD */ - __u32 flags; /* to KFD */ - __u32 dmabuf_fd; /* from KFD */ -}; - -/* - * KFD SMI(System Management Interface) events - */ -enum kfd_smi_event { - KFD_SMI_EVENT_NONE = 0, /* not used */ - KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ - KFD_SMI_EVENT_THERMAL_THROTTLE = 2, - KFD_SMI_EVENT_GPU_PRE_RESET = 3, - KFD_SMI_EVENT_GPU_POST_RESET = 4, -}; - -#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) -#define KFD_SMI_EVENT_MSG_SIZE 96 - -struct kfd_ioctl_smi_events_args { - __u32 gpuid; /* to KFD */ - __u32 anon_fd; /* from KFD */ -}; - -/** - * kfd_ioctl_spm_op - SPM ioctl operations - * - * @KFD_IOCTL_SPM_OP_ACQUIRE: acquire exclusive access to SPM - * @KFD_IOCTL_SPM_OP_RELEASE: release exclusive access to SPM - * @KFD_IOCTL_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming - */ -enum kfd_ioctl_spm_op { - KFD_IOCTL_SPM_OP_ACQUIRE, - KFD_IOCTL_SPM_OP_RELEASE, - KFD_IOCTL_SPM_OP_SET_DEST_BUF -}; - -/** - * kfd_ioctl_spm_args - Arguments for SPM ioctl - * - * @op[in]: specifies the operation to perform - * @gpu_id[in]: GPU ID of the GPU to profile - * @dst_buf[in]: used for the address of the destination buffer - * in @KFD_IOCTL_SPM_SET_DEST_BUFFER - * @buf_size[in]: size of the destination buffer - * @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left - * `in the timeout window - * @bytes_copied[out]: amount of data that was copied to the previous dest_buf - * @has_data_loss: boolean indicating whether data was lost - * (e.g. due to a ring-buffer overflow) - * - * This ioctl performs different functions depending on the @op parameter. - * - * KFD_IOCTL_SPM_OP_ACQUIRE - * ------------------------ - * - * Acquires exclusive access of SPM on the specified @gpu_id for the calling process. - * This must be called before using KFD_IOCTL_SPM_OP_SET_DEST_BUF. - * - * KFD_IOCTL_SPM_OP_RELEASE - * ------------------------ - * - * Releases exclusive access of SPM on the specified @gpu_id for the calling process, - * which allows another process to acquire it in the future. - * - * KFD_IOCTL_SPM_OP_SET_DEST_BUF - * ----------------------------- - * - * If @dst_buf is NULL, the destination buffer address is unset and copying of counters - * is stopped. - * - * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer. - * @buf_size specifies the size of the buffer. - * - * If @timeout is non-0, the call will wait for up to @timeout ms for the previous - * buffer to be filled. If previous buffer to be filled before timeout, the @timeout - * will be updated value with the time remaining. If the timeout is exceeded, the function - * copies any partial data available into the previous user buffer and returns success. - * The amount of valid data in the previous user buffer is indicated by @bytes_copied. - * - * If @timeout is 0, the function immediately replaces the previous destination buffer - * without waiting for the previous buffer to be filled. That means the previous buffer - * may only be partially filled, and @bytes_copied will indicate how much data has been - * copied to it. - * - * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0. - * - * Returns negative error code on failure, 0 on success. - */ -struct kfd_ioctl_spm_args { - __u64 dest_buf; - __u32 buf_size; - __u32 op; - __u32 timeout; - __u32 gpu_id; - __u32 bytes_copied; - __u32 has_data_loss; -}; - -/************************************************************************************************** - * CRIU IOCTLs (Checkpoint Restore In Userspace) - * - * When checkpointing a process, the userspace application will perform: - * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts - * all the queues. - * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) - * 3. UNPAUSE op to un-evict all the queues - * - * When restoring a process, the CRIU userspace application will perform: - * - * 1. RESTORE op to restore process contents - * 2. RESUME op to start the process - * - * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User - * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. - */ - -enum kfd_criu_op { - KFD_CRIU_OP_PROCESS_INFO, - KFD_CRIU_OP_CHECKPOINT, - KFD_CRIU_OP_UNPAUSE, - KFD_CRIU_OP_RESTORE, - KFD_CRIU_OP_RESUME, -}; - -/** - * kfd_ioctl_criu_args - Arguments perform CRIU operation - * @devices: [in/out] User pointer to memory location for devices information. - * This is an array of type kfd_criu_device_bucket. - * @bos: [in/out] User pointer to memory location for BOs information - * This is an array of type kfd_criu_bo_bucket. - * @priv_data: [in/out] User pointer to memory location for private data - * @priv_data_size: [in/out] Size of priv_data in bytes - * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. - * @num_bos [in/out] Number of BOs used by process. Size of @bos array. - * @num_objects: [in/out] Number of objects used by process. Objects are opaque to - * user application. - * @pid: [in/out] PID of the process being checkpointed - * @op [in] Type of operation (kfd_criu_op) - * - * Return: 0 on success, -errno on failure - */ -struct kfd_ioctl_criu_args { - __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ - __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ - __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ - __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ - __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ - __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ - __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ - __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ - __u32 op; -}; - -struct kfd_criu_device_bucket { - __u32 user_gpu_id; - __u32 actual_gpu_id; - __u32 drm_fd; - __u32 pad; -}; - -struct kfd_criu_bo_bucket { - __u64 addr; - __u64 size; - __u64 offset; - __u64 restored_offset; /* During restore, updated offset for BO */ - __u32 gpu_id; /* This is the user_gpu_id */ - __u32 alloc_flags; - __u32 dmabuf_fd; - __u32 pad; -}; - -/* CRIU IOCTLs - END */ -/**************************************************************************************************/ -/* Register offset inside the remapped mmio page - */ -enum kfd_mmio_remap { - KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, - KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, -}; - -struct kfd_ioctl_ipc_export_handle_args { - __u64 handle; /* to KFD */ - __u32 share_handle[4]; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 flags; /* to KFD */ -}; - -struct kfd_ioctl_ipc_import_handle_args { - __u64 handle; /* from KFD */ - __u64 va_addr; /* to KFD */ - __u64 mmap_offset; /* from KFD */ - __u32 share_handle[4]; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 flags; /* from KFD */ -}; - -struct kfd_memory_range { - __u64 va_addr; - __u64 size; -}; - -/* flags definitions - * BIT0: 0: read operation, 1: write operation. - * This also identifies if the src or dst array belongs to remote process - */ -#define KFD_CROSS_MEMORY_RW_BIT (1 << 0) -#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT) -#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT) -#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) - -struct kfd_ioctl_cross_memory_copy_args { - /* to KFD: Process ID of the remote process */ - __u32 pid; - /* to KFD: See above definition */ - __u32 flags; - /* to KFD: Source GPU VM range */ - __u64 src_mem_range_array; - /* to KFD: Size of above array */ - __u64 src_mem_array_size; - /* to KFD: Destination GPU VM range */ - __u64 dst_mem_range_array; - /* to KFD: Size of above array */ - __u64 dst_mem_array_size; - /* from KFD: Total amount of bytes copied */ - __u64 bytes_copied; -}; - -/* Guarantee host access to memory */ -#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 -/* Fine grained coherency between all devices with access */ -#define KFD_IOCTL_SVM_FLAG_COHERENT 0x00000002 -/* Use any GPU in same hive as preferred device */ -#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x00000004 -/* GPUs only read, allows replication */ -#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x00000008 -/* Allow execution on GPU */ -#define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010 -/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */ -#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020 -/* Keep GPU memory mapping always valid as if XNACK is disable */ -#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040 -/* Fine grained coherency between all devices using device-scope atomics */ -#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT 0x00000080 - -/** - * kfd_ioctl_svm_op - SVM ioctl operations - * - * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes - * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes - */ -enum kfd_ioctl_svm_op { - KFD_IOCTL_SVM_OP_SET_ATTR, - KFD_IOCTL_SVM_OP_GET_ATTR -}; - -/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations - * - * GPU IDs are used to specify GPUs as preferred and prefetch locations. - * Below definitions are used for system memory or for leaving the preferred - * location unspecified. - */ -enum kfd_ioctl_svm_location { - KFD_IOCTL_SVM_LOCATION_SYSMEM = 0, - KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff -}; - -/** - * kfd_ioctl_svm_attr_type - SVM attribute types - * - * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for - * system memory - * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for - * system memory. Setting this triggers an - * immediate prefetch (migration). - * @KFD_IOCTL_SVM_ATTR_ACCESS: - * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: - * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given - * by the attribute value - * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see - * KFD_IOCTL_SVM_FLAG_...) - * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear - * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity - * (log2 num pages) - */ -enum kfd_ioctl_svm_attr_type { - KFD_IOCTL_SVM_ATTR_PREFERRED_LOC, - KFD_IOCTL_SVM_ATTR_PREFETCH_LOC, - KFD_IOCTL_SVM_ATTR_ACCESS, - KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE, - KFD_IOCTL_SVM_ATTR_NO_ACCESS, - KFD_IOCTL_SVM_ATTR_SET_FLAGS, - KFD_IOCTL_SVM_ATTR_CLR_FLAGS, - KFD_IOCTL_SVM_ATTR_GRANULARITY -}; - -/** - * kfd_ioctl_svm_attribute - Attributes as pairs of type and value - * - * The meaning of the @value depends on the attribute type. - * - * @type: attribute type (see enum @kfd_ioctl_svm_attr_type) - * @value: attribute value - */ -struct kfd_ioctl_svm_attribute { - __u32 type; - __u32 value; -}; - -/** - * kfd_ioctl_svm_args - Arguments for SVM ioctl - * - * @op specifies the operation to perform (see enum - * @kfd_ioctl_svm_op). @start_addr and @size are common for all - * operations. - * - * A variable number of attributes can be given in @attrs. - * @nattr specifies the number of attributes. New attributes can be - * added in the future without breaking the ABI. If unknown attributes - * are given, the function returns -EINVAL. - * - * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address - * range. It may overlap existing virtual address ranges. If it does, - * the existing ranges will be split such that the attribute changes - * only apply to the specified address range. - * - * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes - * over all memory in the given range and returns the result as the - * attribute value. If different pages have different preferred or - * prefetch locations, 0xffffffff will be returned for - * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or - * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For - * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be - * aggregated by bitwise AND. That means, a flag will be set in the - * output, if that flag is set for all pages in the range. For - * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be - * aggregated by bitwise NOR. That means, a flag will be set in the - * output, if that flag is clear for all pages in the range. - * The minimum migration granularity throughout the range will be - * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY. - * - * Querying of accessibility attributes works by initializing the - * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the - * GPUID being queried. Multiple attributes can be given to allow - * querying multiple GPUIDs. The ioctl function overwrites the - * attribute type to indicate the access for the specified GPU. - */ -struct kfd_ioctl_svm_args { - __u64 start_addr; - __u64 size; - __u32 op; - __u32 nattr; - /* Variable length array of attributes */ - struct kfd_ioctl_svm_attribute attrs[]; -}; - -/** - * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode - * - * @xnack_enabled: [in/out] Whether to enable XNACK mode for this process - * - * @xnack_enabled indicates whether recoverable page faults should be - * enabled for the current process. 0 means disabled, positive means - * enabled, negative means leave unchanged. If enabled, virtual address - * translations on GFXv9 and later AMD GPUs can return XNACK and retry - * the access until a valid PTE is available. This is used to implement - * device page faults. - * - * On output, @xnack_enabled returns the (new) current mode (0 or - * positive). Therefore, a negative input value can be used to query - * the current mode without changing it. - * - * The XNACK mode fundamentally changes the way SVM managed memory works - * in the driver, with subtle effects on application performance and - * functionality. - * - * Enabling XNACK mode requires shader programs to be compiled - * differently. Furthermore, not all GPUs support changing the mode - * per-process. Therefore changing the mode is only allowed while no - * user mode queues exist in the process. This ensure that no shader - * code is running that may be compiled for the wrong mode. And GPUs - * that cannot change to the requested mode will prevent the XNACK - * mode from occurring. All GPUs used by the process must be in the - * same XNACK mode. - * - * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM. - * Therefore those GPUs are not considered for the XNACK mode switch. - * - * Return: 0 on success, -errno on failure - */ -struct kfd_ioctl_set_xnack_mode_args { - __s32 xnack_enabled; -}; - -#define AMDKFD_IOCTL_BASE 'K' -#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) -#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) - -#define AMDKFD_IOC_GET_VERSION \ - AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) - -#define AMDKFD_IOC_CREATE_QUEUE \ - AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) - -#define AMDKFD_IOC_DESTROY_QUEUE \ - AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) - -#define AMDKFD_IOC_SET_MEMORY_POLICY \ - AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) - -#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ - AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) - -#define AMDKFD_IOC_GET_PROCESS_APERTURES \ - AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) - -#define AMDKFD_IOC_UPDATE_QUEUE \ - AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) - -#define AMDKFD_IOC_CREATE_EVENT \ - AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) - -#define AMDKFD_IOC_DESTROY_EVENT \ - AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) - -#define AMDKFD_IOC_SET_EVENT \ - AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) - -#define AMDKFD_IOC_RESET_EVENT \ - AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) - -#define AMDKFD_IOC_WAIT_EVENTS \ - AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) - -#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ - AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) - -#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ - AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) - -#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ - AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) - -#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ - AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) - -#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ - AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) - -#define AMDKFD_IOC_GET_TILE_CONFIG \ - AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) - -#define AMDKFD_IOC_SET_TRAP_HANDLER \ - AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) - -#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ - AMDKFD_IOWR(0x14, \ - struct kfd_ioctl_get_process_apertures_new_args) - -#define AMDKFD_IOC_ACQUIRE_VM \ - AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) - -#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ - AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) - -#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ - AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) - -#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ - AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) - -#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ - AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) - -#define AMDKFD_IOC_SET_CU_MASK \ - AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) - -#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ - AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) - -#define AMDKFD_IOC_GET_DMABUF_INFO \ - AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) - -#define AMDKFD_IOC_IMPORT_DMABUF \ - AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) - -#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ - AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) - -#define AMDKFD_IOC_SMI_EVENTS \ - AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) - -#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) - -#define AMDKFD_IOC_SET_XNACK_MODE \ - AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) - -#define AMDKFD_IOC_CRIU_OP \ - AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) - -#define AMDKFD_IOC_AVAILABLE_MEMORY \ - AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args) - -#define AMDKFD_IOC_EXPORT_DMABUF \ - AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args) - -#define AMDKFD_IOC_RUNTIME_ENABLE \ - AMDKFD_IOWR(0x25, struct kfd_ioctl_runtime_enable_args) - -#define AMDKFD_IOC_DBG_TRAP \ - AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args) - -#define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x27 - -/* non-upstream ioctls */ -#define AMDKFD_IOC_IPC_IMPORT_HANDLE \ - AMDKFD_IOWR(0x80, struct kfd_ioctl_ipc_import_handle_args) - -#define AMDKFD_IOC_IPC_EXPORT_HANDLE \ - AMDKFD_IOWR(0x81, struct kfd_ioctl_ipc_export_handle_args) - -#define AMDKFD_IOC_CROSS_MEMORY_COPY \ - AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_args) - -#define AMDKFD_IOC_RLC_SPM \ - AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args) - - -#define AMDKFD_COMMAND_START_2 0x80 -#define AMDKFD_COMMAND_END_2 0x85 - -#endif diff --git a/extra/hip_gpu_driver/kfd_ioctl.h b/extra/hip_gpu_driver/kfd_ioctl.h new file mode 120000 index 0000000000..a0a34b6c88 --- /dev/null +++ b/extra/hip_gpu_driver/kfd_ioctl.h @@ -0,0 +1 @@ +/usr/include/linux/kfd_ioctl.h \ No newline at end of file diff --git a/extra/hip_gpu_driver/kfd_ioctl.py b/extra/hip_gpu_driver/kfd_ioctl.py deleted file mode 100644 index 22567b82e1..0000000000 --- a/extra/hip_gpu_driver/kfd_ioctl.py +++ /dev/null @@ -1,1359 +0,0 @@ -# -*- coding: utf-8 -*- -# -# TARGET arch is: [] -# WORD_SIZE is: 8 -# POINTER_SIZE is: 8 -# LONGDOUBLE_SIZE is: 16 -# -import ctypes - - -class AsDictMixin: - @classmethod - def as_dict(cls, self): - result = {} - if not isinstance(self, AsDictMixin): - # not a structure, assume it's already a python object - return self - if not hasattr(cls, "_fields_"): - return result - # sys.version_info >= (3, 5) - # for (field, *_) in cls._fields_: # noqa - for field_tuple in cls._fields_: # noqa - field = field_tuple[0] - if field.startswith('PADDING_'): - continue - value = getattr(self, field) - type_ = type(value) - if hasattr(value, "_length_") and hasattr(value, "_type_"): - # array - if not hasattr(type_, "as_dict"): - value = [v for v in value] - else: - type_ = type_._type_ - value = [type_.as_dict(v) for v in value] - elif hasattr(value, "contents") and hasattr(value, "_type_"): - # pointer - try: - if not hasattr(type_, "as_dict"): - value = value.contents - else: - type_ = type_._type_ - value = type_.as_dict(value.contents) - except ValueError: - # nullptr - value = None - elif isinstance(value, AsDictMixin): - # other structure - value = type_.as_dict(value) - result[field] = value - return result - - -class Structure(ctypes.Structure, AsDictMixin): - - def __init__(self, *args, **kwds): - # We don't want to use positional arguments fill PADDING_* fields - - args = dict(zip(self.__class__._field_names_(), args)) - args.update(kwds) - super(Structure, self).__init__(**args) - - @classmethod - def _field_names_(cls): - if hasattr(cls, '_fields_'): - return (f[0] for f in cls._fields_ if not f[0].startswith('PADDING')) - else: - return () - - @classmethod - def get_type(cls, field): - for f in cls._fields_: - if f[0] == field: - return f[1] - return None - - @classmethod - def bind(cls, bound_fields): - fields = {} - for name, type_ in cls._fields_: - if hasattr(type_, "restype"): - if name in bound_fields: - if bound_fields[name] is None: - fields[name] = type_() - else: - # use a closure to capture the callback from the loop scope - fields[name] = ( - type_((lambda callback: lambda *args: callback(*args))( - bound_fields[name])) - ) - del bound_fields[name] - else: - # default callback implementation (does nothing) - try: - default_ = type_(0).restype().value - except TypeError: - default_ = None - fields[name] = type_(( - lambda default_: lambda *args: default_)(default_)) - else: - # not a callback function, use default initialization - if name in bound_fields: - fields[name] = bound_fields[name] - del bound_fields[name] - else: - fields[name] = type_() - if len(bound_fields) != 0: - raise ValueError( - "Cannot bind the following unknown callback(s) {}.{}".format( - cls.__name__, bound_fields.keys() - )) - return cls(**fields) - - -class Union(ctypes.Union, AsDictMixin): - pass - - - - - -class struct_kfd_ioctl_get_version_args(Structure): - pass - -struct_kfd_ioctl_get_version_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_version_args._fields_ = [ - ('major_version', ctypes.c_uint32), - ('minor_version', ctypes.c_uint32), -] - -class struct_kfd_ioctl_create_queue_args(Structure): - pass - -struct_kfd_ioctl_create_queue_args._pack_ = 1 # source:False -struct_kfd_ioctl_create_queue_args._fields_ = [ - ('ring_base_address', ctypes.c_uint64), - ('write_pointer_address', ctypes.c_uint64), - ('read_pointer_address', ctypes.c_uint64), - ('doorbell_offset', ctypes.c_uint64), - ('ring_size', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), - ('queue_type', ctypes.c_uint32), - ('queue_percentage', ctypes.c_uint32), - ('queue_priority', ctypes.c_uint32), - ('queue_id', ctypes.c_uint32), - ('eop_buffer_address', ctypes.c_uint64), - ('eop_buffer_size', ctypes.c_uint64), - ('ctx_save_restore_address', ctypes.c_uint64), - ('ctx_save_restore_size', ctypes.c_uint32), - ('ctl_stack_size', ctypes.c_uint32), -] - -class struct_kfd_ioctl_destroy_queue_args(Structure): - pass - -struct_kfd_ioctl_destroy_queue_args._pack_ = 1 # source:False -struct_kfd_ioctl_destroy_queue_args._fields_ = [ - ('queue_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_update_queue_args(Structure): - pass - -struct_kfd_ioctl_update_queue_args._pack_ = 1 # source:False -struct_kfd_ioctl_update_queue_args._fields_ = [ - ('ring_base_address', ctypes.c_uint64), - ('queue_id', ctypes.c_uint32), - ('ring_size', ctypes.c_uint32), - ('queue_percentage', ctypes.c_uint32), - ('queue_priority', ctypes.c_uint32), -] - -class struct_kfd_ioctl_set_cu_mask_args(Structure): - pass - -struct_kfd_ioctl_set_cu_mask_args._pack_ = 1 # source:False -struct_kfd_ioctl_set_cu_mask_args._fields_ = [ - ('queue_id', ctypes.c_uint32), - ('num_cu_mask', ctypes.c_uint32), - ('cu_mask_ptr', ctypes.c_uint64), -] - -class struct_kfd_ioctl_get_queue_wave_state_args(Structure): - pass - -struct_kfd_ioctl_get_queue_wave_state_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_queue_wave_state_args._fields_ = [ - ('ctl_stack_address', ctypes.c_uint64), - ('ctl_stack_used_size', ctypes.c_uint32), - ('save_area_used_size', ctypes.c_uint32), - ('queue_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_queue_snapshot_entry(Structure): - pass - -struct_kfd_queue_snapshot_entry._pack_ = 1 # source:False -struct_kfd_queue_snapshot_entry._fields_ = [ - ('exception_status', ctypes.c_uint64), - ('ring_base_address', ctypes.c_uint64), - ('write_pointer_address', ctypes.c_uint64), - ('read_pointer_address', ctypes.c_uint64), - ('ctx_save_restore_address', ctypes.c_uint64), - ('queue_id', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), - ('ring_size', ctypes.c_uint32), - ('queue_type', ctypes.c_uint32), - ('ctx_save_restore_area_size', ctypes.c_uint32), - ('reserved', ctypes.c_uint32), -] - -class struct_kfd_dbg_device_info_entry(Structure): - pass - -struct_kfd_dbg_device_info_entry._pack_ = 1 # source:False -struct_kfd_dbg_device_info_entry._fields_ = [ - ('exception_status', ctypes.c_uint64), - ('lds_base', ctypes.c_uint64), - ('lds_limit', ctypes.c_uint64), - ('scratch_base', ctypes.c_uint64), - ('scratch_limit', ctypes.c_uint64), - ('gpuvm_base', ctypes.c_uint64), - ('gpuvm_limit', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('location_id', ctypes.c_uint32), - ('vendor_id', ctypes.c_uint32), - ('device_id', ctypes.c_uint32), - ('revision_id', ctypes.c_uint32), - ('subsystem_vendor_id', ctypes.c_uint32), - ('subsystem_device_id', ctypes.c_uint32), - ('fw_version', ctypes.c_uint32), - ('gfx_target_version', ctypes.c_uint32), - ('simd_count', ctypes.c_uint32), - ('max_waves_per_simd', ctypes.c_uint32), - ('array_count', ctypes.c_uint32), - ('simd_arrays_per_engine', ctypes.c_uint32), - ('num_xcc', ctypes.c_uint32), - ('capability', ctypes.c_uint32), - ('debug_prop', ctypes.c_uint32), -] - -class struct_kfd_ioctl_set_memory_policy_args(Structure): - pass - -struct_kfd_ioctl_set_memory_policy_args._pack_ = 1 # source:False -struct_kfd_ioctl_set_memory_policy_args._fields_ = [ - ('alternate_aperture_base', ctypes.c_uint64), - ('alternate_aperture_size', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('default_policy', ctypes.c_uint32), - ('alternate_policy', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_get_clock_counters_args(Structure): - pass - -struct_kfd_ioctl_get_clock_counters_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_clock_counters_args._fields_ = [ - ('gpu_clock_counter', ctypes.c_uint64), - ('cpu_clock_counter', ctypes.c_uint64), - ('system_clock_counter', ctypes.c_uint64), - ('system_clock_freq', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_process_device_apertures(Structure): - pass - -struct_kfd_process_device_apertures._pack_ = 1 # source:False -struct_kfd_process_device_apertures._fields_ = [ - ('lds_base', ctypes.c_uint64), - ('lds_limit', ctypes.c_uint64), - ('scratch_base', ctypes.c_uint64), - ('scratch_limit', ctypes.c_uint64), - ('gpuvm_base', ctypes.c_uint64), - ('gpuvm_limit', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_get_process_apertures_args(Structure): - pass - -struct_kfd_ioctl_get_process_apertures_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_process_apertures_args._fields_ = [ - ('process_apertures', struct_kfd_process_device_apertures * 7), - ('num_of_nodes', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_get_process_apertures_new_args(Structure): - pass - -struct_kfd_ioctl_get_process_apertures_new_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_process_apertures_new_args._fields_ = [ - ('kfd_process_device_apertures_ptr', ctypes.c_uint64), - ('num_of_nodes', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_register_args(Structure): - pass - -struct_kfd_ioctl_dbg_register_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_register_args._fields_ = [ - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_unregister_args(Structure): - pass - -struct_kfd_ioctl_dbg_unregister_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_unregister_args._fields_ = [ - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_address_watch_args(Structure): - pass - -struct_kfd_ioctl_dbg_address_watch_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_address_watch_args._fields_ = [ - ('content_ptr', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('buf_size_in_bytes', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_wave_control_args(Structure): - pass - -struct_kfd_ioctl_dbg_wave_control_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_wave_control_args._fields_ = [ - ('content_ptr', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('buf_size_in_bytes', ctypes.c_uint32), -] - - -# values for enumeration 'kfd_dbg_trap_override_mode' -kfd_dbg_trap_override_mode__enumvalues = { - 0: 'KFD_DBG_TRAP_OVERRIDE_OR', - 1: 'KFD_DBG_TRAP_OVERRIDE_REPLACE', -} -KFD_DBG_TRAP_OVERRIDE_OR = 0 -KFD_DBG_TRAP_OVERRIDE_REPLACE = 1 -kfd_dbg_trap_override_mode = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_dbg_trap_mask' -kfd_dbg_trap_mask__enumvalues = { - 1: 'KFD_DBG_TRAP_MASK_FP_INVALID', - 2: 'KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL', - 4: 'KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO', - 8: 'KFD_DBG_TRAP_MASK_FP_OVERFLOW', - 16: 'KFD_DBG_TRAP_MASK_FP_UNDERFLOW', - 32: 'KFD_DBG_TRAP_MASK_FP_INEXACT', - 64: 'KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO', - 128: 'KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH', - 256: 'KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION', - 1073741824: 'KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START', - -2147483648: 'KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END', -} -KFD_DBG_TRAP_MASK_FP_INVALID = 1 -KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2 -KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4 -KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8 -KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16 -KFD_DBG_TRAP_MASK_FP_INEXACT = 32 -KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64 -KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH = 128 -KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256 -KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START = 1073741824 -KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END = -2147483648 -kfd_dbg_trap_mask = ctypes.c_int32 # enum - -# values for enumeration 'kfd_dbg_trap_wave_launch_mode' -kfd_dbg_trap_wave_launch_mode__enumvalues = { - 0: 'KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL', - 1: 'KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT', - 3: 'KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG', -} -KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL = 0 -KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT = 1 -KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG = 3 -kfd_dbg_trap_wave_launch_mode = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_dbg_trap_address_watch_mode' -kfd_dbg_trap_address_watch_mode__enumvalues = { - 0: 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ', - 1: 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD', - 2: 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC', - 3: 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL', -} -KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ = 0 -KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD = 1 -KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC = 2 -KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL = 3 -kfd_dbg_trap_address_watch_mode = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_dbg_trap_flags' -kfd_dbg_trap_flags__enumvalues = { - 1: 'KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP', -} -KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1 -kfd_dbg_trap_flags = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_dbg_trap_exception_code' -kfd_dbg_trap_exception_code__enumvalues = { - 0: 'EC_NONE', - 1: 'EC_QUEUE_WAVE_ABORT', - 2: 'EC_QUEUE_WAVE_TRAP', - 3: 'EC_QUEUE_WAVE_MATH_ERROR', - 4: 'EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION', - 5: 'EC_QUEUE_WAVE_MEMORY_VIOLATION', - 6: 'EC_QUEUE_WAVE_APERTURE_VIOLATION', - 16: 'EC_QUEUE_PACKET_DISPATCH_DIM_INVALID', - 17: 'EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID', - 18: 'EC_QUEUE_PACKET_DISPATCH_CODE_INVALID', - 19: 'EC_QUEUE_PACKET_RESERVED', - 20: 'EC_QUEUE_PACKET_UNSUPPORTED', - 21: 'EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID', - 22: 'EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID', - 23: 'EC_QUEUE_PACKET_VENDOR_UNSUPPORTED', - 30: 'EC_QUEUE_PREEMPTION_ERROR', - 31: 'EC_QUEUE_NEW', - 32: 'EC_DEVICE_QUEUE_DELETE', - 33: 'EC_DEVICE_MEMORY_VIOLATION', - 34: 'EC_DEVICE_RAS_ERROR', - 35: 'EC_DEVICE_FATAL_HALT', - 36: 'EC_DEVICE_NEW', - 48: 'EC_PROCESS_RUNTIME', - 49: 'EC_PROCESS_DEVICE_REMOVE', - 50: 'EC_MAX', -} -EC_NONE = 0 -EC_QUEUE_WAVE_ABORT = 1 -EC_QUEUE_WAVE_TRAP = 2 -EC_QUEUE_WAVE_MATH_ERROR = 3 -EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4 -EC_QUEUE_WAVE_MEMORY_VIOLATION = 5 -EC_QUEUE_WAVE_APERTURE_VIOLATION = 6 -EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16 -EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17 -EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18 -EC_QUEUE_PACKET_RESERVED = 19 -EC_QUEUE_PACKET_UNSUPPORTED = 20 -EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21 -EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22 -EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23 -EC_QUEUE_PREEMPTION_ERROR = 30 -EC_QUEUE_NEW = 31 -EC_DEVICE_QUEUE_DELETE = 32 -EC_DEVICE_MEMORY_VIOLATION = 33 -EC_DEVICE_RAS_ERROR = 34 -EC_DEVICE_FATAL_HALT = 35 -EC_DEVICE_NEW = 36 -EC_PROCESS_RUNTIME = 48 -EC_PROCESS_DEVICE_REMOVE = 49 -EC_MAX = 50 -kfd_dbg_trap_exception_code = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_dbg_runtime_state' -kfd_dbg_runtime_state__enumvalues = { - 0: 'DEBUG_RUNTIME_STATE_DISABLED', - 1: 'DEBUG_RUNTIME_STATE_ENABLED', - 2: 'DEBUG_RUNTIME_STATE_ENABLED_BUSY', - 3: 'DEBUG_RUNTIME_STATE_ENABLED_ERROR', -} -DEBUG_RUNTIME_STATE_DISABLED = 0 -DEBUG_RUNTIME_STATE_ENABLED = 1 -DEBUG_RUNTIME_STATE_ENABLED_BUSY = 2 -DEBUG_RUNTIME_STATE_ENABLED_ERROR = 3 -kfd_dbg_runtime_state = ctypes.c_uint32 # enum -class struct_kfd_runtime_info(Structure): - pass - -struct_kfd_runtime_info._pack_ = 1 # source:False -struct_kfd_runtime_info._fields_ = [ - ('r_debug', ctypes.c_uint64), - ('runtime_state', ctypes.c_uint32), - ('ttmp_setup', ctypes.c_uint32), -] - -class struct_kfd_ioctl_runtime_enable_args(Structure): - pass - -struct_kfd_ioctl_runtime_enable_args._pack_ = 1 # source:False -struct_kfd_ioctl_runtime_enable_args._fields_ = [ - ('r_debug', ctypes.c_uint64), - ('mode_mask', ctypes.c_uint32), - ('capabilities_mask', ctypes.c_uint32), -] - -class struct_kfd_context_save_area_header(Structure): - pass - -class struct_kfd_context_save_area_header_wave_state(Structure): - pass - -struct_kfd_context_save_area_header_wave_state._pack_ = 1 # source:False -struct_kfd_context_save_area_header_wave_state._fields_ = [ - ('control_stack_offset', ctypes.c_uint32), - ('control_stack_size', ctypes.c_uint32), - ('wave_state_offset', ctypes.c_uint32), - ('wave_state_size', ctypes.c_uint32), -] - -struct_kfd_context_save_area_header._pack_ = 1 # source:False -struct_kfd_context_save_area_header._fields_ = [ - ('wave_state', struct_kfd_context_save_area_header_wave_state), - ('debug_offset', ctypes.c_uint32), - ('debug_size', ctypes.c_uint32), - ('err_payload_addr', ctypes.c_uint64), - ('err_event_id', ctypes.c_uint32), - ('reserved1', ctypes.c_uint32), -] - - -# values for enumeration 'kfd_dbg_trap_operations' -kfd_dbg_trap_operations__enumvalues = { - 0: 'KFD_IOC_DBG_TRAP_ENABLE', - 1: 'KFD_IOC_DBG_TRAP_DISABLE', - 2: 'KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT', - 3: 'KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED', - 4: 'KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE', - 5: 'KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE', - 6: 'KFD_IOC_DBG_TRAP_SUSPEND_QUEUES', - 7: 'KFD_IOC_DBG_TRAP_RESUME_QUEUES', - 8: 'KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH', - 9: 'KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH', - 10: 'KFD_IOC_DBG_TRAP_SET_FLAGS', - 11: 'KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT', - 12: 'KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO', - 13: 'KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT', - 14: 'KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT', -} -KFD_IOC_DBG_TRAP_ENABLE = 0 -KFD_IOC_DBG_TRAP_DISABLE = 1 -KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT = 2 -KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED = 3 -KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE = 4 -KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE = 5 -KFD_IOC_DBG_TRAP_SUSPEND_QUEUES = 6 -KFD_IOC_DBG_TRAP_RESUME_QUEUES = 7 -KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH = 8 -KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH = 9 -KFD_IOC_DBG_TRAP_SET_FLAGS = 10 -KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT = 11 -KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO = 12 -KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT = 13 -KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT = 14 -kfd_dbg_trap_operations = ctypes.c_uint32 # enum -class struct_kfd_ioctl_dbg_trap_enable_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_enable_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_enable_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), - ('rinfo_ptr', ctypes.c_uint64), - ('rinfo_size', ctypes.c_uint32), - ('dbg_fd', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_send_runtime_event_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_send_runtime_event_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_send_runtime_event_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('queue_id', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_set_exceptions_enabled_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_set_exceptions_enabled_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_set_exceptions_enabled_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), -] - -class struct_kfd_ioctl_dbg_trap_set_wave_launch_override_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_set_wave_launch_override_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_set_wave_launch_override_args._fields_ = [ - ('override_mode', ctypes.c_uint32), - ('enable_mask', ctypes.c_uint32), - ('support_request_mask', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_set_wave_launch_mode_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_set_wave_launch_mode_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_set_wave_launch_mode_args._fields_ = [ - ('launch_mode', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_suspend_queues_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_suspend_queues_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_suspend_queues_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), - ('queue_array_ptr', ctypes.c_uint64), - ('num_queues', ctypes.c_uint32), - ('grace_period', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_resume_queues_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_resume_queues_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_resume_queues_args._fields_ = [ - ('queue_array_ptr', ctypes.c_uint64), - ('num_queues', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_set_node_address_watch_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_set_node_address_watch_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_set_node_address_watch_args._fields_ = [ - ('address', ctypes.c_uint64), - ('mode', ctypes.c_uint32), - ('mask', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), - ('id', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_clear_node_address_watch_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_clear_node_address_watch_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_clear_node_address_watch_args._fields_ = [ - ('gpu_id', ctypes.c_uint32), - ('id', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_set_flags_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_set_flags_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_set_flags_args._fields_ = [ - ('flags', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_query_debug_event_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_query_debug_event_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_query_debug_event_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('queue_id', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_query_exception_info_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_query_exception_info_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_query_exception_info_args._fields_ = [ - ('info_ptr', ctypes.c_uint64), - ('info_size', ctypes.c_uint32), - ('source_id', ctypes.c_uint32), - ('exception_code', ctypes.c_uint32), - ('clear_exception', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_queue_snapshot_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_queue_snapshot_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_queue_snapshot_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), - ('snapshot_buf_ptr', ctypes.c_uint64), - ('num_queues', ctypes.c_uint32), - ('entry_size', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_device_snapshot_args(Structure): - pass - -struct_kfd_ioctl_dbg_trap_device_snapshot_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_device_snapshot_args._fields_ = [ - ('exception_mask', ctypes.c_uint64), - ('snapshot_buf_ptr', ctypes.c_uint64), - ('num_devices', ctypes.c_uint32), - ('entry_size', ctypes.c_uint32), -] - -class struct_kfd_ioctl_dbg_trap_args(Structure): - pass - -class union_kfd_ioctl_dbg_trap_args_0(Union): - _pack_ = 1 # source:False - _fields_ = [ - ('enable', struct_kfd_ioctl_dbg_trap_enable_args), - ('send_runtime_event', struct_kfd_ioctl_dbg_trap_send_runtime_event_args), - ('set_exceptions_enabled', struct_kfd_ioctl_dbg_trap_set_exceptions_enabled_args), - ('launch_override', struct_kfd_ioctl_dbg_trap_set_wave_launch_override_args), - ('launch_mode', struct_kfd_ioctl_dbg_trap_set_wave_launch_mode_args), - ('suspend_queues', struct_kfd_ioctl_dbg_trap_suspend_queues_args), - ('resume_queues', struct_kfd_ioctl_dbg_trap_resume_queues_args), - ('set_node_address_watch', struct_kfd_ioctl_dbg_trap_set_node_address_watch_args), - ('clear_node_address_watch', struct_kfd_ioctl_dbg_trap_clear_node_address_watch_args), - ('set_flags', struct_kfd_ioctl_dbg_trap_set_flags_args), - ('query_debug_event', struct_kfd_ioctl_dbg_trap_query_debug_event_args), - ('query_exception_info', struct_kfd_ioctl_dbg_trap_query_exception_info_args), - ('queue_snapshot', struct_kfd_ioctl_dbg_trap_queue_snapshot_args), - ('device_snapshot', struct_kfd_ioctl_dbg_trap_device_snapshot_args), - ] - -struct_kfd_ioctl_dbg_trap_args._pack_ = 1 # source:False -struct_kfd_ioctl_dbg_trap_args._anonymous_ = ('_0',) -struct_kfd_ioctl_dbg_trap_args._fields_ = [ - ('pid', ctypes.c_uint32), - ('op', ctypes.c_uint32), - ('_0', union_kfd_ioctl_dbg_trap_args_0), -] - -class struct_kfd_ioctl_create_event_args(Structure): - pass - -struct_kfd_ioctl_create_event_args._pack_ = 1 # source:False -struct_kfd_ioctl_create_event_args._fields_ = [ - ('event_page_offset', ctypes.c_uint64), - ('event_trigger_data', ctypes.c_uint32), - ('event_type', ctypes.c_uint32), - ('auto_reset', ctypes.c_uint32), - ('node_id', ctypes.c_uint32), - ('event_id', ctypes.c_uint32), - ('event_slot_index', ctypes.c_uint32), -] - -class struct_kfd_ioctl_destroy_event_args(Structure): - pass - -struct_kfd_ioctl_destroy_event_args._pack_ = 1 # source:False -struct_kfd_ioctl_destroy_event_args._fields_ = [ - ('event_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_set_event_args(Structure): - pass - -struct_kfd_ioctl_set_event_args._pack_ = 1 # source:False -struct_kfd_ioctl_set_event_args._fields_ = [ - ('event_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_reset_event_args(Structure): - pass - -struct_kfd_ioctl_reset_event_args._pack_ = 1 # source:False -struct_kfd_ioctl_reset_event_args._fields_ = [ - ('event_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_memory_exception_failure(Structure): - pass - -struct_kfd_memory_exception_failure._pack_ = 1 # source:False -struct_kfd_memory_exception_failure._fields_ = [ - ('NotPresent', ctypes.c_uint32), - ('ReadOnly', ctypes.c_uint32), - ('NoExecute', ctypes.c_uint32), - ('imprecise', ctypes.c_uint32), -] - -class struct_kfd_hsa_memory_exception_data(Structure): - pass - -struct_kfd_hsa_memory_exception_data._pack_ = 1 # source:False -struct_kfd_hsa_memory_exception_data._fields_ = [ - ('failure', struct_kfd_memory_exception_failure), - ('va', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('ErrorType', ctypes.c_uint32), -] - -class struct_kfd_hsa_hw_exception_data(Structure): - pass - -struct_kfd_hsa_hw_exception_data._pack_ = 1 # source:False -struct_kfd_hsa_hw_exception_data._fields_ = [ - ('reset_type', ctypes.c_uint32), - ('reset_cause', ctypes.c_uint32), - ('memory_lost', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), -] - -class struct_kfd_hsa_signal_event_data(Structure): - pass - -struct_kfd_hsa_signal_event_data._pack_ = 1 # source:False -struct_kfd_hsa_signal_event_data._fields_ = [ - ('last_event_age', ctypes.c_uint64), -] - -class struct_kfd_event_data(Structure): - pass - -class union_kfd_event_data_0(Union): - pass - -union_kfd_event_data_0._pack_ = 1 # source:False -union_kfd_event_data_0._fields_ = [ - ('memory_exception_data', struct_kfd_hsa_memory_exception_data), - ('hw_exception_data', struct_kfd_hsa_hw_exception_data), - ('signal_event_data', struct_kfd_hsa_signal_event_data), - ('PADDING_0', ctypes.c_ubyte * 24), -] - -struct_kfd_event_data._pack_ = 1 # source:False -struct_kfd_event_data._anonymous_ = ('_0',) -struct_kfd_event_data._fields_ = [ - ('_0', union_kfd_event_data_0), - ('kfd_event_data_ext', ctypes.c_uint64), - ('event_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_wait_events_args(Structure): - pass - -struct_kfd_ioctl_wait_events_args._pack_ = 1 # source:False -struct_kfd_ioctl_wait_events_args._fields_ = [ - ('events_ptr', ctypes.c_uint64), - ('num_events', ctypes.c_uint32), - ('wait_for_all', ctypes.c_uint32), - ('timeout', ctypes.c_uint32), - ('wait_result', ctypes.c_uint32), -] - -class struct_kfd_ioctl_set_scratch_backing_va_args(Structure): - pass - -struct_kfd_ioctl_set_scratch_backing_va_args._pack_ = 1 # source:False -struct_kfd_ioctl_set_scratch_backing_va_args._fields_ = [ - ('va_addr', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_get_tile_config_args(Structure): - pass - -struct_kfd_ioctl_get_tile_config_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_tile_config_args._fields_ = [ - ('tile_config_ptr', ctypes.c_uint64), - ('macro_tile_config_ptr', ctypes.c_uint64), - ('num_tile_configs', ctypes.c_uint32), - ('num_macro_tile_configs', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), - ('gb_addr_config', ctypes.c_uint32), - ('num_banks', ctypes.c_uint32), - ('num_ranks', ctypes.c_uint32), -] - -class struct_kfd_ioctl_set_trap_handler_args(Structure): - pass - -struct_kfd_ioctl_set_trap_handler_args._pack_ = 1 # source:False -struct_kfd_ioctl_set_trap_handler_args._fields_ = [ - ('tba_addr', ctypes.c_uint64), - ('tma_addr', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_acquire_vm_args(Structure): - pass - -struct_kfd_ioctl_acquire_vm_args._pack_ = 1 # source:False -struct_kfd_ioctl_acquire_vm_args._fields_ = [ - ('drm_fd', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), -] - -class struct_kfd_ioctl_alloc_memory_of_gpu_args(Structure): - pass - -struct_kfd_ioctl_alloc_memory_of_gpu_args._pack_ = 1 # source:False -struct_kfd_ioctl_alloc_memory_of_gpu_args._fields_ = [ - ('va_addr', ctypes.c_uint64), - ('size', ctypes.c_uint64), - ('handle', ctypes.c_uint64), - ('mmap_offset', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('flags', ctypes.c_uint32), -] - -class struct_kfd_ioctl_free_memory_of_gpu_args(Structure): - pass - -struct_kfd_ioctl_free_memory_of_gpu_args._pack_ = 1 # source:False -struct_kfd_ioctl_free_memory_of_gpu_args._fields_ = [ - ('handle', ctypes.c_uint64), -] - -class struct_kfd_ioctl_get_available_memory_args(Structure): - pass - -struct_kfd_ioctl_get_available_memory_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_available_memory_args._fields_ = [ - ('available', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_map_memory_to_gpu_args(Structure): - pass - -struct_kfd_ioctl_map_memory_to_gpu_args._pack_ = 1 # source:False -struct_kfd_ioctl_map_memory_to_gpu_args._fields_ = [ - ('handle', ctypes.c_uint64), - ('device_ids_array_ptr', ctypes.c_uint64), - ('n_devices', ctypes.c_uint32), - ('n_success', ctypes.c_uint32), -] - -class struct_kfd_ioctl_unmap_memory_from_gpu_args(Structure): - pass - -struct_kfd_ioctl_unmap_memory_from_gpu_args._pack_ = 1 # source:False -struct_kfd_ioctl_unmap_memory_from_gpu_args._fields_ = [ - ('handle', ctypes.c_uint64), - ('device_ids_array_ptr', ctypes.c_uint64), - ('n_devices', ctypes.c_uint32), - ('n_success', ctypes.c_uint32), -] - -class struct_kfd_ioctl_alloc_queue_gws_args(Structure): - pass - -struct_kfd_ioctl_alloc_queue_gws_args._pack_ = 1 # source:False -struct_kfd_ioctl_alloc_queue_gws_args._fields_ = [ - ('queue_id', ctypes.c_uint32), - ('num_gws', ctypes.c_uint32), - ('first_gws', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_ioctl_get_dmabuf_info_args(Structure): - pass - -struct_kfd_ioctl_get_dmabuf_info_args._pack_ = 1 # source:False -struct_kfd_ioctl_get_dmabuf_info_args._fields_ = [ - ('size', ctypes.c_uint64), - ('metadata_ptr', ctypes.c_uint64), - ('metadata_size', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), - ('flags', ctypes.c_uint32), - ('dmabuf_fd', ctypes.c_uint32), -] - -class struct_kfd_ioctl_import_dmabuf_args(Structure): - pass - -struct_kfd_ioctl_import_dmabuf_args._pack_ = 1 # source:False -struct_kfd_ioctl_import_dmabuf_args._fields_ = [ - ('va_addr', ctypes.c_uint64), - ('handle', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('dmabuf_fd', ctypes.c_uint32), -] - -class struct_kfd_ioctl_export_dmabuf_args(Structure): - pass - -struct_kfd_ioctl_export_dmabuf_args._pack_ = 1 # source:False -struct_kfd_ioctl_export_dmabuf_args._fields_ = [ - ('handle', ctypes.c_uint64), - ('flags', ctypes.c_uint32), - ('dmabuf_fd', ctypes.c_uint32), -] - - -# values for enumeration 'kfd_smi_event' -kfd_smi_event__enumvalues = { - 0: 'KFD_SMI_EVENT_NONE', - 1: 'KFD_SMI_EVENT_VMFAULT', - 2: 'KFD_SMI_EVENT_THERMAL_THROTTLE', - 3: 'KFD_SMI_EVENT_GPU_PRE_RESET', - 4: 'KFD_SMI_EVENT_GPU_POST_RESET', -} -KFD_SMI_EVENT_NONE = 0 -KFD_SMI_EVENT_VMFAULT = 1 -KFD_SMI_EVENT_THERMAL_THROTTLE = 2 -KFD_SMI_EVENT_GPU_PRE_RESET = 3 -KFD_SMI_EVENT_GPU_POST_RESET = 4 -kfd_smi_event = ctypes.c_uint32 # enum -class struct_kfd_ioctl_smi_events_args(Structure): - pass - -struct_kfd_ioctl_smi_events_args._pack_ = 1 # source:False -struct_kfd_ioctl_smi_events_args._fields_ = [ - ('gpuid', ctypes.c_uint32), - ('anon_fd', ctypes.c_uint32), -] - - -# values for enumeration 'kfd_ioctl_spm_op' -kfd_ioctl_spm_op__enumvalues = { - 0: 'KFD_IOCTL_SPM_OP_ACQUIRE', - 1: 'KFD_IOCTL_SPM_OP_RELEASE', - 2: 'KFD_IOCTL_SPM_OP_SET_DEST_BUF', -} -KFD_IOCTL_SPM_OP_ACQUIRE = 0 -KFD_IOCTL_SPM_OP_RELEASE = 1 -KFD_IOCTL_SPM_OP_SET_DEST_BUF = 2 -kfd_ioctl_spm_op = ctypes.c_uint32 # enum -class struct_kfd_ioctl_spm_args(Structure): - pass - -struct_kfd_ioctl_spm_args._pack_ = 1 # source:False -struct_kfd_ioctl_spm_args._fields_ = [ - ('dest_buf', ctypes.c_uint64), - ('buf_size', ctypes.c_uint32), - ('op', ctypes.c_uint32), - ('timeout', ctypes.c_uint32), - ('gpu_id', ctypes.c_uint32), - ('bytes_copied', ctypes.c_uint32), - ('has_data_loss', ctypes.c_uint32), -] - - -# values for enumeration 'kfd_criu_op' -kfd_criu_op__enumvalues = { - 0: 'KFD_CRIU_OP_PROCESS_INFO', - 1: 'KFD_CRIU_OP_CHECKPOINT', - 2: 'KFD_CRIU_OP_UNPAUSE', - 3: 'KFD_CRIU_OP_RESTORE', - 4: 'KFD_CRIU_OP_RESUME', -} -KFD_CRIU_OP_PROCESS_INFO = 0 -KFD_CRIU_OP_CHECKPOINT = 1 -KFD_CRIU_OP_UNPAUSE = 2 -KFD_CRIU_OP_RESTORE = 3 -KFD_CRIU_OP_RESUME = 4 -kfd_criu_op = ctypes.c_uint32 # enum -class struct_kfd_ioctl_criu_args(Structure): - pass - -struct_kfd_ioctl_criu_args._pack_ = 1 # source:False -struct_kfd_ioctl_criu_args._fields_ = [ - ('devices', ctypes.c_uint64), - ('bos', ctypes.c_uint64), - ('priv_data', ctypes.c_uint64), - ('priv_data_size', ctypes.c_uint64), - ('num_devices', ctypes.c_uint32), - ('num_bos', ctypes.c_uint32), - ('num_objects', ctypes.c_uint32), - ('pid', ctypes.c_uint32), - ('op', ctypes.c_uint32), - ('PADDING_0', ctypes.c_ubyte * 4), -] - -class struct_kfd_criu_device_bucket(Structure): - pass - -struct_kfd_criu_device_bucket._pack_ = 1 # source:False -struct_kfd_criu_device_bucket._fields_ = [ - ('user_gpu_id', ctypes.c_uint32), - ('actual_gpu_id', ctypes.c_uint32), - ('drm_fd', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - -class struct_kfd_criu_bo_bucket(Structure): - pass - -struct_kfd_criu_bo_bucket._pack_ = 1 # source:False -struct_kfd_criu_bo_bucket._fields_ = [ - ('addr', ctypes.c_uint64), - ('size', ctypes.c_uint64), - ('offset', ctypes.c_uint64), - ('restored_offset', ctypes.c_uint64), - ('gpu_id', ctypes.c_uint32), - ('alloc_flags', ctypes.c_uint32), - ('dmabuf_fd', ctypes.c_uint32), - ('pad', ctypes.c_uint32), -] - - -# values for enumeration 'kfd_mmio_remap' -kfd_mmio_remap__enumvalues = { - 0: 'KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL', - 4: 'KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL', -} -KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0 -KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4 -kfd_mmio_remap = ctypes.c_uint32 # enum -class struct_kfd_ioctl_ipc_export_handle_args(Structure): - pass - -struct_kfd_ioctl_ipc_export_handle_args._pack_ = 1 # source:False -struct_kfd_ioctl_ipc_export_handle_args._fields_ = [ - ('handle', ctypes.c_uint64), - ('share_handle', ctypes.c_uint32 * 4), - ('gpu_id', ctypes.c_uint32), - ('flags', ctypes.c_uint32), -] - -class struct_kfd_ioctl_ipc_import_handle_args(Structure): - pass - -struct_kfd_ioctl_ipc_import_handle_args._pack_ = 1 # source:False -struct_kfd_ioctl_ipc_import_handle_args._fields_ = [ - ('handle', ctypes.c_uint64), - ('va_addr', ctypes.c_uint64), - ('mmap_offset', ctypes.c_uint64), - ('share_handle', ctypes.c_uint32 * 4), - ('gpu_id', ctypes.c_uint32), - ('flags', ctypes.c_uint32), -] - -class struct_kfd_memory_range(Structure): - pass - -struct_kfd_memory_range._pack_ = 1 # source:False -struct_kfd_memory_range._fields_ = [ - ('va_addr', ctypes.c_uint64), - ('size', ctypes.c_uint64), -] - -class struct_kfd_ioctl_cross_memory_copy_args(Structure): - pass - -struct_kfd_ioctl_cross_memory_copy_args._pack_ = 1 # source:False -struct_kfd_ioctl_cross_memory_copy_args._fields_ = [ - ('pid', ctypes.c_uint32), - ('flags', ctypes.c_uint32), - ('src_mem_range_array', ctypes.c_uint64), - ('src_mem_array_size', ctypes.c_uint64), - ('dst_mem_range_array', ctypes.c_uint64), - ('dst_mem_array_size', ctypes.c_uint64), - ('bytes_copied', ctypes.c_uint64), -] - - -# values for enumeration 'kfd_ioctl_svm_op' -kfd_ioctl_svm_op__enumvalues = { - 0: 'KFD_IOCTL_SVM_OP_SET_ATTR', - 1: 'KFD_IOCTL_SVM_OP_GET_ATTR', -} -KFD_IOCTL_SVM_OP_SET_ATTR = 0 -KFD_IOCTL_SVM_OP_GET_ATTR = 1 -kfd_ioctl_svm_op = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_ioctl_svm_location' -kfd_ioctl_svm_location__enumvalues = { - 0: 'KFD_IOCTL_SVM_LOCATION_SYSMEM', - 4294967295: 'KFD_IOCTL_SVM_LOCATION_UNDEFINED', -} -KFD_IOCTL_SVM_LOCATION_SYSMEM = 0 -KFD_IOCTL_SVM_LOCATION_UNDEFINED = 4294967295 -kfd_ioctl_svm_location = ctypes.c_uint32 # enum - -# values for enumeration 'kfd_ioctl_svm_attr_type' -kfd_ioctl_svm_attr_type__enumvalues = { - 0: 'KFD_IOCTL_SVM_ATTR_PREFERRED_LOC', - 1: 'KFD_IOCTL_SVM_ATTR_PREFETCH_LOC', - 2: 'KFD_IOCTL_SVM_ATTR_ACCESS', - 3: 'KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE', - 4: 'KFD_IOCTL_SVM_ATTR_NO_ACCESS', - 5: 'KFD_IOCTL_SVM_ATTR_SET_FLAGS', - 6: 'KFD_IOCTL_SVM_ATTR_CLR_FLAGS', - 7: 'KFD_IOCTL_SVM_ATTR_GRANULARITY', -} -KFD_IOCTL_SVM_ATTR_PREFERRED_LOC = 0 -KFD_IOCTL_SVM_ATTR_PREFETCH_LOC = 1 -KFD_IOCTL_SVM_ATTR_ACCESS = 2 -KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE = 3 -KFD_IOCTL_SVM_ATTR_NO_ACCESS = 4 -KFD_IOCTL_SVM_ATTR_SET_FLAGS = 5 -KFD_IOCTL_SVM_ATTR_CLR_FLAGS = 6 -KFD_IOCTL_SVM_ATTR_GRANULARITY = 7 -kfd_ioctl_svm_attr_type = ctypes.c_uint32 # enum -class struct_kfd_ioctl_svm_attribute(Structure): - pass - -struct_kfd_ioctl_svm_attribute._pack_ = 1 # source:False -struct_kfd_ioctl_svm_attribute._fields_ = [ - ('type', ctypes.c_uint32), - ('value', ctypes.c_uint32), -] - -class struct_kfd_ioctl_svm_args(Structure): - pass - -struct_kfd_ioctl_svm_args._pack_ = 1 # source:False -struct_kfd_ioctl_svm_args._fields_ = [ - ('start_addr', ctypes.c_uint64), - ('size', ctypes.c_uint64), - ('op', ctypes.c_uint32), - ('nattr', ctypes.c_uint32), - ('attrs', struct_kfd_ioctl_svm_attribute * 0), -] - -class struct_kfd_ioctl_set_xnack_mode_args(Structure): - pass - -struct_kfd_ioctl_set_xnack_mode_args._pack_ = 1 # source:False -struct_kfd_ioctl_set_xnack_mode_args._fields_ = [ - ('xnack_enabled', ctypes.c_int32), -] - -__all__ = \ - ['DEBUG_RUNTIME_STATE_DISABLED', 'DEBUG_RUNTIME_STATE_ENABLED', - 'DEBUG_RUNTIME_STATE_ENABLED_BUSY', - 'DEBUG_RUNTIME_STATE_ENABLED_ERROR', 'EC_DEVICE_FATAL_HALT', - 'EC_DEVICE_MEMORY_VIOLATION', 'EC_DEVICE_NEW', - 'EC_DEVICE_QUEUE_DELETE', 'EC_DEVICE_RAS_ERROR', 'EC_MAX', - 'EC_NONE', 'EC_PROCESS_DEVICE_REMOVE', 'EC_PROCESS_RUNTIME', - 'EC_QUEUE_NEW', 'EC_QUEUE_PACKET_DISPATCH_CODE_INVALID', - 'EC_QUEUE_PACKET_DISPATCH_DIM_INVALID', - 'EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID', - 'EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID', - 'EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID', - 'EC_QUEUE_PACKET_RESERVED', 'EC_QUEUE_PACKET_UNSUPPORTED', - 'EC_QUEUE_PACKET_VENDOR_UNSUPPORTED', 'EC_QUEUE_PREEMPTION_ERROR', - 'EC_QUEUE_WAVE_ABORT', 'EC_QUEUE_WAVE_APERTURE_VIOLATION', - 'EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION', 'EC_QUEUE_WAVE_MATH_ERROR', - 'EC_QUEUE_WAVE_MEMORY_VIOLATION', 'EC_QUEUE_WAVE_TRAP', - 'KFD_CRIU_OP_CHECKPOINT', 'KFD_CRIU_OP_PROCESS_INFO', - 'KFD_CRIU_OP_RESTORE', 'KFD_CRIU_OP_RESUME', - 'KFD_CRIU_OP_UNPAUSE', 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL', - 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC', - 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD', - 'KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ', - 'KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP', - 'KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH', - 'KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION', - 'KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO', - 'KFD_DBG_TRAP_MASK_FP_INEXACT', - 'KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL', - 'KFD_DBG_TRAP_MASK_FP_INVALID', 'KFD_DBG_TRAP_MASK_FP_OVERFLOW', - 'KFD_DBG_TRAP_MASK_FP_UNDERFLOW', - 'KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO', - 'KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END', - 'KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START', - 'KFD_DBG_TRAP_OVERRIDE_OR', 'KFD_DBG_TRAP_OVERRIDE_REPLACE', - 'KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG', - 'KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT', - 'KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL', - 'KFD_IOCTL_SPM_OP_ACQUIRE', 'KFD_IOCTL_SPM_OP_RELEASE', - 'KFD_IOCTL_SPM_OP_SET_DEST_BUF', 'KFD_IOCTL_SVM_ATTR_ACCESS', - 'KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE', - 'KFD_IOCTL_SVM_ATTR_CLR_FLAGS', 'KFD_IOCTL_SVM_ATTR_GRANULARITY', - 'KFD_IOCTL_SVM_ATTR_NO_ACCESS', - 'KFD_IOCTL_SVM_ATTR_PREFERRED_LOC', - 'KFD_IOCTL_SVM_ATTR_PREFETCH_LOC', 'KFD_IOCTL_SVM_ATTR_SET_FLAGS', - 'KFD_IOCTL_SVM_LOCATION_SYSMEM', - 'KFD_IOCTL_SVM_LOCATION_UNDEFINED', 'KFD_IOCTL_SVM_OP_GET_ATTR', - 'KFD_IOCTL_SVM_OP_SET_ATTR', - 'KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH', - 'KFD_IOC_DBG_TRAP_DISABLE', 'KFD_IOC_DBG_TRAP_ENABLE', - 'KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT', - 'KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT', - 'KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT', - 'KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO', - 'KFD_IOC_DBG_TRAP_RESUME_QUEUES', - 'KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT', - 'KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED', - 'KFD_IOC_DBG_TRAP_SET_FLAGS', - 'KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH', - 'KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE', - 'KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE', - 'KFD_IOC_DBG_TRAP_SUSPEND_QUEUES', - 'KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL', - 'KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL', - 'KFD_SMI_EVENT_GPU_POST_RESET', 'KFD_SMI_EVENT_GPU_PRE_RESET', - 'KFD_SMI_EVENT_NONE', 'KFD_SMI_EVENT_THERMAL_THROTTLE', - 'KFD_SMI_EVENT_VMFAULT', 'kfd_criu_op', 'kfd_dbg_runtime_state', - 'kfd_dbg_trap_address_watch_mode', 'kfd_dbg_trap_exception_code', - 'kfd_dbg_trap_flags', 'kfd_dbg_trap_mask', - 'kfd_dbg_trap_operations', 'kfd_dbg_trap_override_mode', - 'kfd_dbg_trap_wave_launch_mode', 'kfd_ioctl_spm_op', - 'kfd_ioctl_svm_attr_type', 'kfd_ioctl_svm_location', - 'kfd_ioctl_svm_op', 'kfd_mmio_remap', 'kfd_smi_event', - 'struct_kfd_context_save_area_header', - 'struct_kfd_context_save_area_header_wave_state', - 'struct_kfd_criu_bo_bucket', 'struct_kfd_criu_device_bucket', - 'struct_kfd_dbg_device_info_entry', 'struct_kfd_event_data', - 'struct_kfd_hsa_hw_exception_data', - 'struct_kfd_hsa_memory_exception_data', - 'struct_kfd_hsa_signal_event_data', - 'struct_kfd_ioctl_acquire_vm_args', - 'struct_kfd_ioctl_alloc_memory_of_gpu_args', - 'struct_kfd_ioctl_alloc_queue_gws_args', - 'struct_kfd_ioctl_create_event_args', - 'struct_kfd_ioctl_create_queue_args', - 'struct_kfd_ioctl_criu_args', - 'struct_kfd_ioctl_cross_memory_copy_args', - 'struct_kfd_ioctl_dbg_address_watch_args', - 'struct_kfd_ioctl_dbg_register_args', - 'struct_kfd_ioctl_dbg_trap_args', - 'struct_kfd_ioctl_dbg_trap_clear_node_address_watch_args', - 'struct_kfd_ioctl_dbg_trap_device_snapshot_args', - 'struct_kfd_ioctl_dbg_trap_enable_args', - 'struct_kfd_ioctl_dbg_trap_query_debug_event_args', - 'struct_kfd_ioctl_dbg_trap_query_exception_info_args', - 'struct_kfd_ioctl_dbg_trap_queue_snapshot_args', - 'struct_kfd_ioctl_dbg_trap_resume_queues_args', - 'struct_kfd_ioctl_dbg_trap_send_runtime_event_args', - 'struct_kfd_ioctl_dbg_trap_set_exceptions_enabled_args', - 'struct_kfd_ioctl_dbg_trap_set_flags_args', - 'struct_kfd_ioctl_dbg_trap_set_node_address_watch_args', - 'struct_kfd_ioctl_dbg_trap_set_wave_launch_mode_args', - 'struct_kfd_ioctl_dbg_trap_set_wave_launch_override_args', - 'struct_kfd_ioctl_dbg_trap_suspend_queues_args', - 'struct_kfd_ioctl_dbg_unregister_args', - 'struct_kfd_ioctl_dbg_wave_control_args', - 'struct_kfd_ioctl_destroy_event_args', - 'struct_kfd_ioctl_destroy_queue_args', - 'struct_kfd_ioctl_export_dmabuf_args', - 'struct_kfd_ioctl_free_memory_of_gpu_args', - 'struct_kfd_ioctl_get_available_memory_args', - 'struct_kfd_ioctl_get_clock_counters_args', - 'struct_kfd_ioctl_get_dmabuf_info_args', - 'struct_kfd_ioctl_get_process_apertures_args', - 'struct_kfd_ioctl_get_process_apertures_new_args', - 'struct_kfd_ioctl_get_queue_wave_state_args', - 'struct_kfd_ioctl_get_tile_config_args', - 'struct_kfd_ioctl_get_version_args', - 'struct_kfd_ioctl_import_dmabuf_args', - 'struct_kfd_ioctl_ipc_export_handle_args', - 'struct_kfd_ioctl_ipc_import_handle_args', - 'struct_kfd_ioctl_map_memory_to_gpu_args', - 'struct_kfd_ioctl_reset_event_args', - 'struct_kfd_ioctl_runtime_enable_args', - 'struct_kfd_ioctl_set_cu_mask_args', - 'struct_kfd_ioctl_set_event_args', - 'struct_kfd_ioctl_set_memory_policy_args', - 'struct_kfd_ioctl_set_scratch_backing_va_args', - 'struct_kfd_ioctl_set_trap_handler_args', - 'struct_kfd_ioctl_set_xnack_mode_args', - 'struct_kfd_ioctl_smi_events_args', 'struct_kfd_ioctl_spm_args', - 'struct_kfd_ioctl_svm_args', 'struct_kfd_ioctl_svm_attribute', - 'struct_kfd_ioctl_unmap_memory_from_gpu_args', - 'struct_kfd_ioctl_update_queue_args', - 'struct_kfd_ioctl_wait_events_args', - 'struct_kfd_memory_exception_failure', 'struct_kfd_memory_range', - 'struct_kfd_process_device_apertures', - 'struct_kfd_queue_snapshot_entry', 'struct_kfd_runtime_info', - 'union_kfd_event_data_0', 'union_kfd_ioctl_dbg_trap_args_0'] diff --git a/extra/hip_gpu_driver/sdma_registers.h b/extra/hip_gpu_driver/sdma_registers.h new file mode 100644 index 0000000000..254744f6a8 --- /dev/null +++ b/extra/hip_gpu_driver/sdma_registers.h @@ -0,0 +1,571 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_SDMA_REGISTERS_H_ +#define HSA_RUNTIME_CORE_INC_SDMA_REGISTERS_H_ + +#include +#include + +namespace rocr { +namespace AMD { + +// SDMA packet for VI device. +// Reference: http://people.freedesktop.org/~agd5f/dma_packets.txt + +const unsigned int SDMA_OP_COPY = 1; +const unsigned int SDMA_OP_FENCE = 5; +const unsigned int SDMA_OP_TRAP = 6; +const unsigned int SDMA_OP_POLL_REGMEM = 8; +const unsigned int SDMA_OP_ATOMIC = 10; +const unsigned int SDMA_OP_CONST_FILL = 11; +const unsigned int SDMA_OP_TIMESTAMP = 13; +const unsigned int SDMA_OP_GCR = 17; +const unsigned int SDMA_SUBOP_COPY_LINEAR = 0; +const unsigned int SDMA_SUBOP_COPY_LINEAR_RECT = 4; +const unsigned int SDMA_SUBOP_TIMESTAMP_GET_GLOBAL = 2; +const unsigned int SDMA_SUBOP_USER_GCR = 1; +const unsigned int SDMA_ATOMIC_ADD64 = 47; + +typedef struct SDMA_PKT_COPY_LINEAR_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int extra_info : 16; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int count : 22; + unsigned int reserved_0 : 10; + }; + unsigned int DW_1_DATA; + } COUNT_UNION; + + union { + struct { + unsigned int reserved_0 : 16; + unsigned int dst_swap : 2; + unsigned int reserved_1 : 6; + unsigned int src_swap : 2; + unsigned int reserved_2 : 6; + }; + unsigned int DW_2_DATA; + } PARAMETER_UNION; + + union { + struct { + unsigned int src_addr_31_0 : 32; + }; + unsigned int DW_3_DATA; + } SRC_ADDR_LO_UNION; + + union { + struct { + unsigned int src_addr_63_32 : 32; + }; + unsigned int DW_4_DATA; + } SRC_ADDR_HI_UNION; + + union { + struct { + unsigned int dst_addr_31_0 : 32; + }; + unsigned int DW_5_DATA; + } DST_ADDR_LO_UNION; + + union { + struct { + unsigned int dst_addr_63_32 : 32; + }; + unsigned int DW_6_DATA; + } DST_ADDR_HI_UNION; + + static const size_t kMaxSize_ = 0x3fffe0; +} SDMA_PKT_COPY_LINEAR; + +// linear sub-window +typedef struct SDMA_PKT_COPY_LINEAR_RECT_TAG { + static const unsigned int pitch_bits = 19; + static const unsigned int slice_bits = 28; + static const unsigned int rect_xy_bits = 14; + static const unsigned int rect_z_bits = 11; + + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int reserved : 13; + unsigned int element : 3; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int src_addr_31_0 : 32; + }; + unsigned int DW_1_DATA; + } SRC_ADDR_LO_UNION; + + union { + struct { + unsigned int src_addr_63_32 : 32; + }; + unsigned int DW_2_DATA; + } SRC_ADDR_HI_UNION; + + union { + struct { + unsigned int src_offset_x : 14; + unsigned int reserved_1 : 2; + unsigned int src_offset_y : 14; + unsigned int reserved_2 : 2; + }; + unsigned int DW_3_DATA; + } SRC_PARAMETER_1_UNION; + + union { + struct { + unsigned int src_offset_z : 11; + unsigned int reserved_1 : 2; + unsigned int src_pitch : pitch_bits; + }; + unsigned int DW_4_DATA; + } SRC_PARAMETER_2_UNION; + + union { + struct { + unsigned int src_slice_pitch : slice_bits; + unsigned int reserved_1 : 4; + }; + unsigned int DW_5_DATA; + } SRC_PARAMETER_3_UNION; + + union { + struct { + unsigned int dst_addr_31_0 : 32; + }; + unsigned int DW_6_DATA; + } DST_ADDR_LO_UNION; + + union { + struct { + unsigned int dst_addr_63_32 : 32; + }; + unsigned int DW_7_DATA; + } DST_ADDR_HI_UNION; + + union { + struct { + unsigned int dst_offset_x : 14; + unsigned int reserved_1 : 2; + unsigned int dst_offset_y : 14; + unsigned int reserved_2 : 2; + }; + unsigned int DW_8_DATA; + } DST_PARAMETER_1_UNION; + + union { + struct { + unsigned int dst_offset_z : 11; + unsigned int reserved_1 : 2; + unsigned int dst_pitch : pitch_bits; + }; + unsigned int DW_9_DATA; + } DST_PARAMETER_2_UNION; + + union { + struct { + unsigned int dst_slice_pitch : slice_bits; + unsigned int reserved_1 : 4; + }; + unsigned int DW_10_DATA; + } DST_PARAMETER_3_UNION; + + union { + struct { + unsigned int rect_x : rect_xy_bits; + unsigned int reserved_1 : 2; + unsigned int rect_y : rect_xy_bits; + unsigned int reserved_2 : 2; + }; + unsigned int DW_11_DATA; + } RECT_PARAMETER_1_UNION; + + union { + struct { + unsigned int rect_z : rect_z_bits; + unsigned int reserved_1 : 5; + unsigned int dst_swap : 2; + unsigned int reserved_2 : 6; + unsigned int src_swap : 2; + unsigned int reserved_3 : 6; + }; + unsigned int DW_12_DATA; + } RECT_PARAMETER_2_UNION; + +} SDMA_PKT_COPY_LINEAR_RECT; + +typedef struct SDMA_PKT_CONSTANT_FILL_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int sw : 2; + unsigned int reserved_0 : 12; + unsigned int fillsize : 2; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int dst_addr_31_0 : 32; + }; + unsigned int DW_1_DATA; + } DST_ADDR_LO_UNION; + + union { + struct { + unsigned int dst_addr_63_32 : 32; + }; + unsigned int DW_2_DATA; + } DST_ADDR_HI_UNION; + + union { + struct { + unsigned int src_data_31_0 : 32; + }; + unsigned int DW_3_DATA; + } DATA_UNION; + + union { + struct { + unsigned int count : 22; + unsigned int reserved_0 : 10; + }; + unsigned int DW_4_DATA; + } COUNT_UNION; + + static const size_t kMaxSize_ = 0x3fffe0; +} SDMA_PKT_CONSTANT_FILL; + +typedef struct SDMA_PKT_FENCE_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int mtype : 3; + unsigned int gcc : 1; + unsigned int sys : 1; + unsigned int pad1 : 1; + unsigned int snp : 1; + unsigned int gpa : 1; + unsigned int l2_policy : 2; + unsigned int reserved_0 : 6; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int addr_31_0 : 32; + }; + unsigned int DW_1_DATA; + } ADDR_LO_UNION; + + union { + struct { + unsigned int addr_63_32 : 32; + }; + unsigned int DW_2_DATA; + } ADDR_HI_UNION; + + union { + struct { + unsigned int data : 32; + }; + unsigned int DW_3_DATA; + } DATA_UNION; +} SDMA_PKT_FENCE; + +typedef struct SDMA_PKT_POLL_REGMEM_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int reserved_0 : 10; + unsigned int hdp_flush : 1; + unsigned int reserved_1 : 1; + unsigned int func : 3; + unsigned int mem_poll : 1; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int addr_31_0 : 32; + }; + unsigned int DW_1_DATA; + } ADDR_LO_UNION; + + union { + struct { + unsigned int addr_63_32 : 32; + }; + unsigned int DW_2_DATA; + } ADDR_HI_UNION; + + union { + struct { + unsigned int value : 32; + }; + unsigned int DW_3_DATA; + } VALUE_UNION; + + union { + struct { + unsigned int mask : 32; + }; + unsigned int DW_4_DATA; + } MASK_UNION; + + union { + struct { + unsigned int interval : 16; + unsigned int retry_count : 12; + unsigned int reserved_0 : 4; + }; + unsigned int DW_5_DATA; + } DW5_UNION; +} SDMA_PKT_POLL_REGMEM; + +typedef struct SDMA_PKT_ATOMIC_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int l : 1; + unsigned int reserved_0 : 8; + unsigned int operation : 7; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int addr_31_0 : 32; + }; + unsigned int DW_1_DATA; + } ADDR_LO_UNION; + + union { + struct { + unsigned int addr_63_32 : 32; + }; + unsigned int DW_2_DATA; + } ADDR_HI_UNION; + + union { + struct { + unsigned int src_data_31_0 : 32; + }; + unsigned int DW_3_DATA; + } SRC_DATA_LO_UNION; + + union { + struct { + unsigned int src_data_63_32 : 32; + }; + unsigned int DW_4_DATA; + } SRC_DATA_HI_UNION; + + union { + struct { + unsigned int cmp_data_31_0 : 32; + }; + unsigned int DW_5_DATA; + } CMP_DATA_LO_UNION; + + union { + struct { + unsigned int cmp_data_63_32 : 32; + }; + unsigned int DW_6_DATA; + } CMP_DATA_HI_UNION; + + union { + struct { + unsigned int loop_interval : 13; + unsigned int reserved_0 : 19; + }; + unsigned int DW_7_DATA; + } LOOP_UNION; +} SDMA_PKT_ATOMIC; + +typedef struct SDMA_PKT_TIMESTAMP_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int reserved_0 : 16; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int addr_31_0 : 32; + }; + unsigned int DW_1_DATA; + } ADDR_LO_UNION; + + union { + struct { + unsigned int addr_63_32 : 32; + }; + unsigned int DW_2_DATA; + } ADDR_HI_UNION; + +} SDMA_PKT_TIMESTAMP; + +typedef struct SDMA_PKT_TRAP_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int reserved_0 : 16; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int int_ctx : 28; + unsigned int reserved_1 : 4; + }; + unsigned int DW_1_DATA; + } INT_CONTEXT_UNION; +} SDMA_PKT_TRAP; + +// HDP flush packet, no parameters. +typedef struct SDMA_PKT_HDP_FLUSH_TAG { + unsigned int DW_0_DATA; + unsigned int DW_1_DATA; + unsigned int DW_2_DATA; + unsigned int DW_3_DATA; + unsigned int DW_4_DATA; + unsigned int DW_5_DATA; + + // Version of gfx9 sDMA microcode introducing SDMA_PKT_HDP_FLUSH + static const uint16_t kMinVersion_ = 0x1A5; +} SDMA_PKT_HDP_FLUSH; +static const SDMA_PKT_HDP_FLUSH hdp_flush_cmd = {0x8, 0x0, 0x80000000, 0x0, 0x0, 0x0}; + +typedef struct SDMA_PKT_GCR_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int : 16; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int : 7; + unsigned int BaseVA_LO : 25; + }; + unsigned int DW_1_DATA; + } WORD1_UNION; + + union { + struct { + unsigned int BaseVA_HI : 16; + unsigned int GCR_CONTROL_GLI_INV : 2; + unsigned int GCR_CONTROL_GL1_RANGE : 2; + unsigned int GCR_CONTROL_GLM_WB : 1; + unsigned int GCR_CONTROL_GLM_INV : 1; + unsigned int GCR_CONTROL_GLK_WB : 1; + unsigned int GCR_CONTROL_GLK_INV : 1; + unsigned int GCR_CONTROL_GLV_INV : 1; + unsigned int GCR_CONTROL_GL1_INV : 1; + unsigned int GCR_CONTROL_GL2_US : 1; + unsigned int GCR_CONTROL_GL2_RANGE : 2; + unsigned int GCR_CONTROL_GL2_DISCARD : 1; + unsigned int GCR_CONTROL_GL2_INV : 1; + unsigned int GCR_CONTROL_GL2_WB : 1; + }; + unsigned int DW_2_DATA; + } WORD2_UNION; + + union { + struct { + unsigned int GCR_CONTROL_RANGE_IS_PA : 1; + unsigned int GCR_CONTROL_SEQ : 2; + unsigned int : 4; + unsigned int LimitVA_LO : 25; + }; + unsigned int DW_3_DATA; + } WORD3_UNION; + + union { + struct { + unsigned int LimitVA_HI : 16; + unsigned int : 8; + unsigned int VMID : 4; + unsigned int : 4; + }; + unsigned int DW_4_DATA; + } WORD4_UNION; +} SDMA_PKT_GCR; + +} // namespace amd +} // namespace rocr + +#endif // HSA_RUNTIME_CORE_INC_SDMA_REGISTERS_H_ diff --git a/extra/hip_gpu_driver/test_kfd_2.py b/extra/hip_gpu_driver/test_kfd_2.py new file mode 100644 index 0000000000..caa424477a --- /dev/null +++ b/extra/hip_gpu_driver/test_kfd_2.py @@ -0,0 +1,207 @@ +import os, ctypes, pathlib, re, fcntl, functools, mmap, time +import tinygrad.runtime.autogen.kfd as kfd +from tinygrad.helpers import to_mv, getenv +from extra.hip_gpu_driver import hip_ioctl +import tinygrad.runtime.autogen.hsa as hsa +from hexdump import hexdump + +libc = ctypes.CDLL("libc.so.6") +libc.memset.argtypes = [ctypes.c_void_p, ctypes.c_char, ctypes.c_int] +libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long] +libc.mmap.restype = ctypes.c_void_p +MAP_NORESERVE = 0x4000 +MAP_FIXED = 0x10 + +def kfd_ioctl(idir, nr, user_struct, fd, **kwargs): + made = user_struct(**kwargs) + ret = fcntl.ioctl(fd, (idir<<30) | (ctypes.sizeof(user_struct)<<16) | (ord('K')<<8) | nr, made) + if ret != 0: raise RuntimeError(f"ioctl returned {ret}") + return made + +def format_struct(s): + sdats = [] + for field_name, field_type in s._fields_: + dat = getattr(s, field_name) + if isinstance(dat, int): sdats.append(f"{field_name}:0x{dat:X}") + else: sdats.append(f"{field_name}:{dat}") + return sdats + +idirs = {"IOW": 1, "IOR": 2, "IOWR": 3} +def ioctls_from_header(): + hdr = pathlib.Path("/usr/include/linux/kfd_ioctl.h").read_text().replace("\\\n", "") + pattern = r'#define\s+(AMDKFD_IOC_[A-Z0-9_]+)\s+AMDKFD_(IOW?R?)\((0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)' + matches = re.findall(pattern, hdr, re.MULTILINE) + + fxns = {} + for name, idir, nr, sname in matches: + fxns[name.replace("AMDKFD_IOC_", "").lower()] = functools.partial(kfd_ioctl, idirs[idir], int(nr, 0x10), getattr(kfd, "struct_"+sname)) + return type("KIO", (object, ), fxns) +kio = ioctls_from_header() + +# sudo su -c "echo 'file drivers/gpu/drm/amd/* +p' > /sys/kernel/debug/dynamic_debug/control" + +def gpu_alloc_userptr(fd, size, flags): + addr = libc.mmap(0, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS, -1, 0) + assert addr != 0xffffffffffffffff + mem = kio.alloc_memory_of_gpu(fd, va_addr=addr, size=size, gpu_id=GPU_ID, flags=flags, mmap_offset=addr) + return mem + +def gpu_alloc(fd, size, flags): + addr = libc.mmap(0, size, 0, mmap.MAP_PRIVATE|mmap.MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) + assert addr != 0xffffffffffffffff + mem = kio.alloc_memory_of_gpu(fd, va_addr=addr, size=size, gpu_id=GPU_ID, flags=flags) + buf = libc.mmap(mem.va_addr, mem.size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|MAP_FIXED, drm_fd, mem.mmap_offset) + assert buf != 0xffffffffffffffff + assert addr == buf == mem.va_addr + return mem + +if __name__ == "__main__": + fd = os.open("/dev/kfd", os.O_RDWR) + gpu_num = getenv("GPU", 0) + drm_fd = os.open(f"/dev/dri/renderD{128+gpu_num}", os.O_RDWR) + with open(f"/sys/devices/virtual/kfd/kfd/topology/nodes/{1+gpu_num}/gpu_id", "r") as f: GPU_ID = int(f.read()) + + #ver = kio.get_version(fd) + st = kio.acquire_vm(fd, drm_fd=drm_fd, gpu_id=GPU_ID) + #exit(0) + + # 0xF0000001 = KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE + # 0xD6000002 = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE + # 0xD6000004 = KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE + # 0x94000010 = KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE + #addr = libc.mmap(0, 0x1000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_PRIVATE|mmap.MAP_ANONYMOUS, -1, 0) + #addr = libc.mmap(0, 0x1000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS, -1, 0) + #mem = kio.AMDKFD_IOC_ALLOC_MEMORY_OF_GPU(fd, va_addr=addr, size=0x1000, gpu_id=GPU_ID, flags=0xD6000004) + + #mem = gpu_alloc(fd, 0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM | + # kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | + # kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + #arr = (ctypes.c_int32 * 1)(GPU_ID) + #stm = kio.map_memory_to_gpu(fd, handle=mem.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + + arr = (ctypes.c_int32 * 1)(GPU_ID) + rw_ptr = gpu_alloc(fd, 0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + stm = kio.map_memory_to_gpu(fd, handle=rw_ptr.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + event_page = gpu_alloc(fd, 0x8000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + stm = kio.map_memory_to_gpu(fd, handle=event_page.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + ring_base = gpu_alloc_userptr(fd, 0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + stm = kio.map_memory_to_gpu(fd, handle=ring_base.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + signals = gpu_alloc_userptr(fd, 0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + stm = kio.map_memory_to_gpu(fd, handle=signals.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + eop_buffer = gpu_alloc(fd, 0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + stm = kio.map_memory_to_gpu(fd, handle=eop_buffer.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + ctx_save_restore_address = gpu_alloc(fd, 0x2C02000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | + kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE) + stm = kio.map_memory_to_gpu(fd, handle=ctx_save_restore_address.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + + #113.00 ms + 0.00 ms : 0 = AMDKFD_IOC_CREATE_QUEUE ring_base_address:0x797465200000 write_pointer_address:0x79751C068038 read_pointer_address:0x79751C068080 doorbell_offset:0x0 ring_size:0x800000 gpu_id:0x433D queue_type:0x2 queue_per + #centage:0x64 queue_priority:0x7 queue_id:0x0 eop_buffer_address:0x79751C064000 eop_buffer_size:0x1000 ctx_save_restore_address:0x796E52400000 ctx_save_restore_size:0x2BEA000 ctl_stack_size:0xA000 + + #113.84 ms + 0.59 ms : 0 = AMDKFD_IOC_CREATE_QUEUE ring_base_address:0x71AC3F600000 write_pointer_address:0x71B302AB0038 read_pointer_address:0x71B302AB0080 doorbell_offset:0xD0CF400000000008 ring_size:0x800000 gpu_id:0x433D queue_typ + #e:0x2 queue_percentage:0x64 queue_priority:0x7 queue_id:0x1 eop_buffer_address:0x71B302AAC000 eop_buffer_size:0x1000 ctx_save_restore_address:0x71AC3C800000 ctx_save_restore_size:0x2BEA000 ctl_stack_size:0xA000 + + #define KFD_MMAP_TYPE_SHIFT 62 + #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) + evt = kio.create_event(fd, event_page_offset=event_page.handle, auto_reset=1) + + nq = kio.create_queue(fd, ring_base_address=ring_base.va_addr, ring_size=0x1000, gpu_id=GPU_ID, + queue_type=kfd.KFD_IOC_QUEUE_TYPE_COMPUTE_AQL, queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, + queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY, + eop_buffer_address=eop_buffer.va_addr, eop_buffer_size=0x1000, + ctx_save_restore_address=ctx_save_restore_address.va_addr, ctx_save_restore_size=0x2C02000, + ctl_stack_size = 0xa000, + # write_pointer_address and read_pointer_address are on GART + #write_pointer_address=0xaaaabbbb, read_pointer_address=0xaaaacccc) + write_pointer_address=rw_ptr.va_addr+0, read_pointer_address=rw_ptr.va_addr+0x8) + doorbell = libc.mmap(0, 8192, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, fd, nq.doorbell_offset) + print("doorbell", hex(doorbell)) + + to_mv(signals.va_addr, 0x40) + + """ + hexdump(to_mv(event_page.va_addr, 0x40)) + kio.set_event(fd, event_id=evt.event_id) + hexdump(to_mv(event_page.va_addr, 0x40)) + kio.reset_event(fd, event_id=evt.event_id) + hexdump(to_mv(event_page.va_addr, 0x40)) + """ + + # KFD_EVENT_TYPE_SIGNAL + + BARRIER_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER + BARRIER_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE + BARRIER_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE + BARRIER_HEADER |= hsa.HSA_PACKET_TYPE_BARRIER_AND << hsa.HSA_PACKET_HEADER_TYPE + + AQL_PACKET_SIZE = ctypes.sizeof(hsa.hsa_kernel_dispatch_packet_t) + EMPTY_SIGNAL = hsa.hsa_signal_t() + + ds = to_mv(rw_ptr.va_addr, 0x100).cast("Q") + ds[0] = 1 #ring_base.va_addr + AQL_PACKET_SIZE + ds[1] = 0 #ring_base.va_addr + #libc.memset(rw_ptr.va_addr, 0xaa, 0x100) + #hexdump(to_mv(rw_ptr.va_addr, 0x100)) + + #packet = hsa.hsa_barrier_and_packet_t.from_address(rw_ptr.va_addr+0x38) + packet = hsa.hsa_barrier_and_packet_t.from_address(ring_base.va_addr) + packet.reserved0 = 0 + packet.reserved1 = 0 + for i in range(5): packet.dep_signal[i] = EMPTY_SIGNAL + #packet.dep_signal[0] = hsa.hsa_signal_t(evt.event_id) + packet.reserved2 = 0 + #packet.completion_signal = EMPTY_SIGNAL + packet.completion_signal = hsa.hsa_signal_t(signals.va_addr) + packet.header = BARRIER_HEADER + hexdump(to_mv(ring_base.va_addr, AQL_PACKET_SIZE)) + + # _HsaEventData + to_mv(signals.va_addr, 0x40).cast("Q")[0] = 1 + to_mv(signals.va_addr, 0x40).cast("Q")[1] = 1 + #to_mv(signals.va_addr, 0x40).cast("Q")[2] = event_page + to_mv(signals.va_addr, 0x40).cast("Q")[2] = event_page.va_addr + evt.event_slot_index*8 # HWData2=HWAddress + to_mv(signals.va_addr, 0x40).cast("Q")[3] = evt.event_trigger_data # HWData3=HWData + print(hex(ds[0]), hex(ds[1]), hex(ds[2])) + hexdump(to_mv(signals.va_addr, 0x40)) + + # 10 08 49 3E 46 77 00 00 + + + # ring doorbell + print(hex(to_mv(doorbell, 0x10).cast("I")[0])) + #to_mv(doorbell, 0x10).cast("I")[0] = 0xffffffff + to_mv(doorbell, 0x10).cast("I")[0] = 0 + + evt_arr = (kfd.struct_kfd_event_data * 1)() + evt_arr[0].event_id = evt.event_id + kio.wait_events(fd, events_ptr=ctypes.addressof(evt_arr), num_events=1, wait_for_all=0, timeout=1000) + + print(hex(ds[0]), hex(ds[1]), hex(ds[2])) + hexdump(to_mv(signals.va_addr, 0x40)) + + #nq = kio.create_queue(fd, ring_base_address=buf, ring_size=0x1000, gpu_id=GPU_ID, + # queue_type=kfd.KFD_IOC_QUEUE_TYPE_COMPUTE_AQL, queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, + # queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY, write_pointer_address=buf+8, read_pointer_address=buf+0x10) + #print(nq) + + #mv = to_mv(buf, 0x1000) + #addr = libc.mmap(0, 0x1000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_PRIVATE|mmap.MAP_ANONYMOUS, -1, 0) + + #print('\n'.join(format_struct(ver))) + #print('\n'.join(format_struct(st))) diff --git a/tinygrad/runtime/autogen/amd_sdma.py b/tinygrad/runtime/autogen/amd_sdma.py new file mode 100644 index 0000000000..42a13a28f5 --- /dev/null +++ b/tinygrad/runtime/autogen/amd_sdma.py @@ -0,0 +1,1430 @@ +# mypy: ignore-errors +# -*- coding: utf-8 -*- +# +# TARGET arch is: ['-I/opt/rocm/include', '-x', 'c++'] +# WORD_SIZE is: 8 +# POINTER_SIZE is: 8 +# LONGDOUBLE_SIZE is: 16 +# +import ctypes + + +class AsDictMixin: + @classmethod + def as_dict(cls, self): + result = {} + if not isinstance(self, AsDictMixin): + # not a structure, assume it's already a python object + return self + if not hasattr(cls, "_fields_"): + return result + # sys.version_info >= (3, 5) + # for (field, *_) in cls._fields_: # noqa + for field_tuple in cls._fields_: # noqa + field = field_tuple[0] + if field.startswith('PADDING_'): + continue + value = getattr(self, field) + type_ = type(value) + if hasattr(value, "_length_") and hasattr(value, "_type_"): + # array + if not hasattr(type_, "as_dict"): + value = [v for v in value] + else: + type_ = type_._type_ + value = [type_.as_dict(v) for v in value] + elif hasattr(value, "contents") and hasattr(value, "_type_"): + # pointer + try: + if not hasattr(type_, "as_dict"): + value = value.contents + else: + type_ = type_._type_ + value = type_.as_dict(value.contents) + except ValueError: + # nullptr + value = None + elif isinstance(value, AsDictMixin): + # other structure + value = type_.as_dict(value) + result[field] = value + return result + + +class Structure(ctypes.Structure, AsDictMixin): + + def __init__(self, *args, **kwds): + # We don't want to use positional arguments fill PADDING_* fields + + args = dict(zip(self.__class__._field_names_(), args)) + args.update(kwds) + super(Structure, self).__init__(**args) + + @classmethod + def _field_names_(cls): + if hasattr(cls, '_fields_'): + return (f[0] for f in cls._fields_ if not f[0].startswith('PADDING')) + else: + return () + + @classmethod + def get_type(cls, field): + for f in cls._fields_: + if f[0] == field: + return f[1] + return None + + @classmethod + def bind(cls, bound_fields): + fields = {} + for name, type_ in cls._fields_: + if hasattr(type_, "restype"): + if name in bound_fields: + if bound_fields[name] is None: + fields[name] = type_() + else: + # use a closure to capture the callback from the loop scope + fields[name] = ( + type_((lambda callback: lambda *args: callback(*args))( + bound_fields[name])) + ) + del bound_fields[name] + else: + # default callback implementation (does nothing) + try: + default_ = type_(0).restype().value + except TypeError: + default_ = None + fields[name] = type_(( + lambda default_: lambda *args: default_)(default_)) + else: + # not a callback function, use default initialization + if name in bound_fields: + fields[name] = bound_fields[name] + del bound_fields[name] + else: + fields[name] = type_() + if len(bound_fields) != 0: + raise ValueError( + "Cannot bind the following unknown callback(s) {}.{}".format( + cls.__name__, bound_fields.keys() + )) + return cls(**fields) + + +class Union(ctypes.Union, AsDictMixin): + pass + + + + + +SDMA_OP_COPY = 1 # Variable ctypes.c_uint32 +SDMA_OP_FENCE = 5 # Variable ctypes.c_uint32 +SDMA_OP_TRAP = 6 # Variable ctypes.c_uint32 +SDMA_OP_POLL_REGMEM = 8 # Variable ctypes.c_uint32 +SDMA_OP_ATOMIC = 10 # Variable ctypes.c_uint32 +SDMA_OP_CONST_FILL = 11 # Variable ctypes.c_uint32 +SDMA_OP_TIMESTAMP = 13 # Variable ctypes.c_uint32 +SDMA_OP_GCR = 17 # Variable ctypes.c_uint32 +SDMA_SUBOP_COPY_LINEAR = 0 # Variable ctypes.c_uint32 +SDMA_SUBOP_COPY_LINEAR_RECT = 4 # Variable ctypes.c_uint32 +SDMA_SUBOP_TIMESTAMP_GET_GLOBAL = 2 # Variable ctypes.c_uint32 +SDMA_SUBOP_USER_GCR = 1 # Variable ctypes.c_uint32 +SDMA_ATOMIC_ADD64 = 47 # Variable ctypes.c_uint32 +class struct_SDMA_PKT_COPY_LINEAR_TAG(Structure): + pass + +class union_SDMA_PKT_COPY_LINEAR_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('extra_info', ctypes.c_uint32, 16), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_TAG_COUNT_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_1_0._fields_ = [ + ('count', ctypes.c_uint32, 22), + ('reserved_0', ctypes.c_uint32, 10), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_COUNT_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_COUNT_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_COUNT_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_TAG_PARAMETER_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_2_0._fields_ = [ + ('reserved_0', ctypes.c_uint32, 16), + ('dst_swap', ctypes.c_uint32, 2), + ('reserved_1', ctypes.c_uint32, 6), + ('src_swap', ctypes.c_uint32, 2), + ('reserved_2', ctypes.c_uint32, 6), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_PARAMETER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_PARAMETER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_PARAMETER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_3_0._fields_ = [ + ('src_addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_4_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_4_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_4_0._fields_ = [ + ('src_addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_4_0), + ('DW_4_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_5_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_5_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_5_0._fields_ = [ + ('dst_addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_5_0), + ('DW_5_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_TAG_6_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_TAG_6_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG_6_0._fields_ = [ + ('dst_addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_TAG_6_0), + ('DW_6_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_COPY_LINEAR_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_HEADER_UNION), + ('COUNT_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_COUNT_UNION), + ('PARAMETER_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_PARAMETER_UNION), + ('SRC_ADDR_LO_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_LO_UNION), + ('SRC_ADDR_HI_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_HI_UNION), + ('DST_ADDR_LO_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_LO_UNION), + ('DST_ADDR_HI_UNION', union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_HI_UNION), +] + +SDMA_PKT_COPY_LINEAR = struct_SDMA_PKT_COPY_LINEAR_TAG +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG(Structure): + pass + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('reserved', ctypes.c_uint32, 13), + ('element', ctypes.c_uint32, 3), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_1_0._fields_ = [ + ('src_addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_2_0._fields_ = [ + ('src_addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_1_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_3_0._fields_ = [ + ('src_offset_x', ctypes.c_uint32, 14), + ('reserved_1', ctypes.c_uint32, 2), + ('src_offset_y', ctypes.c_uint32, 14), + ('reserved_2', ctypes.c_uint32, 2), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_1_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_1_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_1_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_2_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_4_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_4_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_4_0._fields_ = [ + ('src_offset_z', ctypes.c_uint32, 11), + ('reserved_1', ctypes.c_uint32, 2), + ('src_pitch', ctypes.c_uint32, 19), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_2_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_2_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_2_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_4_0), + ('DW_4_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_3_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_5_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_5_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_5_0._fields_ = [ + ('src_slice_pitch', ctypes.c_uint32, 28), + ('reserved_1', ctypes.c_uint32, 4), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_3_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_3_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_3_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_5_0), + ('DW_5_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_6_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_6_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_6_0._fields_ = [ + ('dst_addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_6_0), + ('DW_6_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_7_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_7_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_7_0._fields_ = [ + ('dst_addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_7_0), + ('DW_7_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_1_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_8_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_8_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_8_0._fields_ = [ + ('dst_offset_x', ctypes.c_uint32, 14), + ('reserved_1', ctypes.c_uint32, 2), + ('dst_offset_y', ctypes.c_uint32, 14), + ('reserved_2', ctypes.c_uint32, 2), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_1_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_1_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_1_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_8_0), + ('DW_8_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_2_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_9_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_9_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_9_0._fields_ = [ + ('dst_offset_z', ctypes.c_uint32, 11), + ('reserved_1', ctypes.c_uint32, 2), + ('dst_pitch', ctypes.c_uint32, 19), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_2_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_2_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_2_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_9_0), + ('DW_9_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_3_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_10_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_10_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_10_0._fields_ = [ + ('dst_slice_pitch', ctypes.c_uint32, 28), + ('reserved_1', ctypes.c_uint32, 4), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_3_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_3_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_3_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_10_0), + ('DW_10_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_1_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_11_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_11_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_11_0._fields_ = [ + ('rect_x', ctypes.c_uint32, 14), + ('reserved_1', ctypes.c_uint32, 2), + ('rect_y', ctypes.c_uint32, 14), + ('reserved_2', ctypes.c_uint32, 2), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_1_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_1_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_1_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_11_0), + ('DW_11_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_2_UNION(Union): + pass + +class struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_12_0(Structure): + pass + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_12_0._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_12_0._fields_ = [ + ('rect_z', ctypes.c_uint32, 11), + ('reserved_1', ctypes.c_uint32, 5), + ('dst_swap', ctypes.c_uint32, 2), + ('reserved_2', ctypes.c_uint32, 6), + ('src_swap', ctypes.c_uint32, 2), + ('reserved_3', ctypes.c_uint32, 6), +] + +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_2_UNION._pack_ = 1 # source:False +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_2_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_2_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_12_0), + ('DW_12_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_COPY_LINEAR_RECT_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_HEADER_UNION), + ('SRC_ADDR_LO_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_LO_UNION), + ('SRC_ADDR_HI_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_HI_UNION), + ('SRC_PARAMETER_1_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_1_UNION), + ('SRC_PARAMETER_2_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_2_UNION), + ('SRC_PARAMETER_3_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_3_UNION), + ('DST_ADDR_LO_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_LO_UNION), + ('DST_ADDR_HI_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_HI_UNION), + ('DST_PARAMETER_1_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_1_UNION), + ('DST_PARAMETER_2_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_2_UNION), + ('DST_PARAMETER_3_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_3_UNION), + ('RECT_PARAMETER_1_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_1_UNION), + ('RECT_PARAMETER_2_UNION', union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_2_UNION), +] + +SDMA_PKT_COPY_LINEAR_RECT = struct_SDMA_PKT_COPY_LINEAR_RECT_TAG +class struct_SDMA_PKT_CONSTANT_FILL_TAG(Structure): + pass + +class union_SDMA_PKT_CONSTANT_FILL_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_CONSTANT_FILL_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_CONSTANT_FILL_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_CONSTANT_FILL_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('sw', ctypes.c_uint32, 2), + ('reserved_0', ctypes.c_uint32, 12), + ('fillsize', ctypes.c_uint32, 2), +] + +union_SDMA_PKT_CONSTANT_FILL_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_CONSTANT_FILL_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_CONSTANT_FILL_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_CONSTANT_FILL_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_CONSTANT_FILL_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_CONSTANT_FILL_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_CONSTANT_FILL_TAG_1_0._fields_ = [ + ('dst_addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_CONSTANT_FILL_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_CONSTANT_FILL_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_CONSTANT_FILL_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_CONSTANT_FILL_TAG_2_0._fields_ = [ + ('dst_addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_CONSTANT_FILL_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_CONSTANT_FILL_TAG_DATA_UNION(Union): + pass + +class struct_SDMA_PKT_CONSTANT_FILL_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_CONSTANT_FILL_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_CONSTANT_FILL_TAG_3_0._fields_ = [ + ('src_data_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_CONSTANT_FILL_TAG_DATA_UNION._pack_ = 1 # source:False +union_SDMA_PKT_CONSTANT_FILL_TAG_DATA_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_CONSTANT_FILL_TAG_DATA_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_CONSTANT_FILL_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_CONSTANT_FILL_TAG_COUNT_UNION(Union): + pass + +class struct_SDMA_PKT_CONSTANT_FILL_TAG_4_0(Structure): + pass + +struct_SDMA_PKT_CONSTANT_FILL_TAG_4_0._pack_ = 1 # source:False +struct_SDMA_PKT_CONSTANT_FILL_TAG_4_0._fields_ = [ + ('count', ctypes.c_uint32, 22), + ('reserved_0', ctypes.c_uint32, 10), +] + +union_SDMA_PKT_CONSTANT_FILL_TAG_COUNT_UNION._pack_ = 1 # source:False +union_SDMA_PKT_CONSTANT_FILL_TAG_COUNT_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_CONSTANT_FILL_TAG_COUNT_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_CONSTANT_FILL_TAG_4_0), + ('DW_4_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_CONSTANT_FILL_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_CONSTANT_FILL_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_CONSTANT_FILL_TAG_HEADER_UNION), + ('DST_ADDR_LO_UNION', union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_LO_UNION), + ('DST_ADDR_HI_UNION', union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_HI_UNION), + ('DATA_UNION', union_SDMA_PKT_CONSTANT_FILL_TAG_DATA_UNION), + ('COUNT_UNION', union_SDMA_PKT_CONSTANT_FILL_TAG_COUNT_UNION), +] + +SDMA_PKT_CONSTANT_FILL = struct_SDMA_PKT_CONSTANT_FILL_TAG +class struct_SDMA_PKT_FENCE_TAG(Structure): + pass + +class union_SDMA_PKT_FENCE_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_FENCE_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_FENCE_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_FENCE_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('mtype', ctypes.c_uint32, 3), + ('gcc', ctypes.c_uint32, 1), + ('sys', ctypes.c_uint32, 1), + ('pad1', ctypes.c_uint32, 1), + ('snp', ctypes.c_uint32, 1), + ('gpa', ctypes.c_uint32, 1), + ('l2_policy', ctypes.c_uint32, 2), + ('reserved_0', ctypes.c_uint32, 6), +] + +union_SDMA_PKT_FENCE_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_FENCE_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_FENCE_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_FENCE_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_FENCE_TAG_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_FENCE_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_FENCE_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_FENCE_TAG_1_0._fields_ = [ + ('addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_FENCE_TAG_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_FENCE_TAG_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_FENCE_TAG_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_FENCE_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_FENCE_TAG_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_FENCE_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_FENCE_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_FENCE_TAG_2_0._fields_ = [ + ('addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_FENCE_TAG_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_FENCE_TAG_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_FENCE_TAG_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_FENCE_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_FENCE_TAG_DATA_UNION(Union): + pass + +class struct_SDMA_PKT_FENCE_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_FENCE_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_FENCE_TAG_3_0._fields_ = [ + ('data', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_FENCE_TAG_DATA_UNION._pack_ = 1 # source:False +union_SDMA_PKT_FENCE_TAG_DATA_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_FENCE_TAG_DATA_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_FENCE_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_FENCE_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_FENCE_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_FENCE_TAG_HEADER_UNION), + ('ADDR_LO_UNION', union_SDMA_PKT_FENCE_TAG_ADDR_LO_UNION), + ('ADDR_HI_UNION', union_SDMA_PKT_FENCE_TAG_ADDR_HI_UNION), + ('DATA_UNION', union_SDMA_PKT_FENCE_TAG_DATA_UNION), +] + +SDMA_PKT_FENCE = struct_SDMA_PKT_FENCE_TAG +class struct_SDMA_PKT_POLL_REGMEM_TAG(Structure): + pass + +class union_SDMA_PKT_POLL_REGMEM_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_POLL_REGMEM_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_POLL_REGMEM_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('reserved_0', ctypes.c_uint32, 10), + ('hdp_flush', ctypes.c_uint32, 1), + ('reserved_1', ctypes.c_uint32, 1), + ('func', ctypes.c_uint32, 3), + ('mem_poll', ctypes.c_uint32, 1), +] + +union_SDMA_PKT_POLL_REGMEM_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_POLL_REGMEM_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_POLL_REGMEM_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_POLL_REGMEM_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_POLL_REGMEM_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_POLL_REGMEM_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG_1_0._fields_ = [ + ('addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_POLL_REGMEM_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_POLL_REGMEM_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_POLL_REGMEM_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG_2_0._fields_ = [ + ('addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_POLL_REGMEM_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_POLL_REGMEM_TAG_VALUE_UNION(Union): + pass + +class struct_SDMA_PKT_POLL_REGMEM_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_POLL_REGMEM_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG_3_0._fields_ = [ + ('value', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_POLL_REGMEM_TAG_VALUE_UNION._pack_ = 1 # source:False +union_SDMA_PKT_POLL_REGMEM_TAG_VALUE_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_POLL_REGMEM_TAG_VALUE_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_POLL_REGMEM_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_POLL_REGMEM_TAG_MASK_UNION(Union): + pass + +class struct_SDMA_PKT_POLL_REGMEM_TAG_4_0(Structure): + pass + +struct_SDMA_PKT_POLL_REGMEM_TAG_4_0._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG_4_0._fields_ = [ + ('mask', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_POLL_REGMEM_TAG_MASK_UNION._pack_ = 1 # source:False +union_SDMA_PKT_POLL_REGMEM_TAG_MASK_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_POLL_REGMEM_TAG_MASK_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_POLL_REGMEM_TAG_4_0), + ('DW_4_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_POLL_REGMEM_TAG_DW5_UNION(Union): + pass + +class struct_SDMA_PKT_POLL_REGMEM_TAG_5_0(Structure): + pass + +struct_SDMA_PKT_POLL_REGMEM_TAG_5_0._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG_5_0._fields_ = [ + ('interval', ctypes.c_uint32, 16), + ('retry_count', ctypes.c_uint32, 12), + ('reserved_0', ctypes.c_uint32, 4), +] + +union_SDMA_PKT_POLL_REGMEM_TAG_DW5_UNION._pack_ = 1 # source:False +union_SDMA_PKT_POLL_REGMEM_TAG_DW5_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_POLL_REGMEM_TAG_DW5_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_POLL_REGMEM_TAG_5_0), + ('DW_5_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_POLL_REGMEM_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_POLL_REGMEM_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_POLL_REGMEM_TAG_HEADER_UNION), + ('ADDR_LO_UNION', union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_LO_UNION), + ('ADDR_HI_UNION', union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_HI_UNION), + ('VALUE_UNION', union_SDMA_PKT_POLL_REGMEM_TAG_VALUE_UNION), + ('MASK_UNION', union_SDMA_PKT_POLL_REGMEM_TAG_MASK_UNION), + ('DW5_UNION', union_SDMA_PKT_POLL_REGMEM_TAG_DW5_UNION), +] + +SDMA_PKT_POLL_REGMEM = struct_SDMA_PKT_POLL_REGMEM_TAG +class struct_SDMA_PKT_ATOMIC_TAG(Structure): + pass + +class union_SDMA_PKT_ATOMIC_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('l', ctypes.c_uint32, 1), + ('reserved_0', ctypes.c_uint32, 8), + ('operation', ctypes.c_uint32, 7), +] + +union_SDMA_PKT_ATOMIC_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_1_0._fields_ = [ + ('addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_ATOMIC_TAG_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_2_0._fields_ = [ + ('addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_ATOMIC_TAG_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_LO_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_3_0._fields_ = [ + ('src_data_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_HI_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_4_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_4_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_4_0._fields_ = [ + ('src_data_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_4_0), + ('DW_4_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_LO_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_5_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_5_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_5_0._fields_ = [ + ('cmp_data_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_5_0), + ('DW_5_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_HI_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_6_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_6_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_6_0._fields_ = [ + ('cmp_data_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_6_0), + ('DW_6_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_ATOMIC_TAG_LOOP_UNION(Union): + pass + +class struct_SDMA_PKT_ATOMIC_TAG_7_0(Structure): + pass + +struct_SDMA_PKT_ATOMIC_TAG_7_0._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG_7_0._fields_ = [ + ('loop_interval', ctypes.c_uint32, 13), + ('reserved_0', ctypes.c_uint32, 19), +] + +union_SDMA_PKT_ATOMIC_TAG_LOOP_UNION._pack_ = 1 # source:False +union_SDMA_PKT_ATOMIC_TAG_LOOP_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_ATOMIC_TAG_LOOP_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_ATOMIC_TAG_7_0), + ('DW_7_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_ATOMIC_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_ATOMIC_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_ATOMIC_TAG_HEADER_UNION), + ('ADDR_LO_UNION', union_SDMA_PKT_ATOMIC_TAG_ADDR_LO_UNION), + ('ADDR_HI_UNION', union_SDMA_PKT_ATOMIC_TAG_ADDR_HI_UNION), + ('SRC_DATA_LO_UNION', union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_LO_UNION), + ('SRC_DATA_HI_UNION', union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_HI_UNION), + ('CMP_DATA_LO_UNION', union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_LO_UNION), + ('CMP_DATA_HI_UNION', union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_HI_UNION), + ('LOOP_UNION', union_SDMA_PKT_ATOMIC_TAG_LOOP_UNION), +] + +SDMA_PKT_ATOMIC = struct_SDMA_PKT_ATOMIC_TAG +class struct_SDMA_PKT_TIMESTAMP_TAG(Structure): + pass + +class union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_TIMESTAMP_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_TIMESTAMP_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_TIMESTAMP_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('reserved_0', ctypes.c_uint32, 16), +] + +union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_TIMESTAMP_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_TIMESTAMP_TAG_ADDR_LO_UNION(Union): + pass + +class struct_SDMA_PKT_TIMESTAMP_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_TIMESTAMP_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_TIMESTAMP_TAG_1_0._fields_ = [ + ('addr_31_0', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_TIMESTAMP_TAG_ADDR_LO_UNION._pack_ = 1 # source:False +union_SDMA_PKT_TIMESTAMP_TAG_ADDR_LO_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_TIMESTAMP_TAG_ADDR_LO_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_TIMESTAMP_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_TIMESTAMP_TAG_ADDR_HI_UNION(Union): + pass + +class struct_SDMA_PKT_TIMESTAMP_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_TIMESTAMP_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_TIMESTAMP_TAG_2_0._fields_ = [ + ('addr_63_32', ctypes.c_uint32, 32), +] + +union_SDMA_PKT_TIMESTAMP_TAG_ADDR_HI_UNION._pack_ = 1 # source:False +union_SDMA_PKT_TIMESTAMP_TAG_ADDR_HI_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_TIMESTAMP_TAG_ADDR_HI_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_TIMESTAMP_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_TIMESTAMP_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_TIMESTAMP_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION), + ('ADDR_LO_UNION', union_SDMA_PKT_TIMESTAMP_TAG_ADDR_LO_UNION), + ('ADDR_HI_UNION', union_SDMA_PKT_TIMESTAMP_TAG_ADDR_HI_UNION), +] + +SDMA_PKT_TIMESTAMP = struct_SDMA_PKT_TIMESTAMP_TAG +class struct_SDMA_PKT_TRAP_TAG(Structure): + pass + +class union_SDMA_PKT_TRAP_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_TRAP_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_TRAP_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_TRAP_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('reserved_0', ctypes.c_uint32, 16), +] + +union_SDMA_PKT_TRAP_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_TRAP_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_TRAP_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_TRAP_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION(Union): + pass + +class struct_SDMA_PKT_TRAP_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_TRAP_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_TRAP_TAG_1_0._fields_ = [ + ('int_ctx', ctypes.c_uint32, 28), + ('reserved_1', ctypes.c_uint32, 4), +] + +union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION._pack_ = 1 # source:False +union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_TRAP_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_TRAP_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_TRAP_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_TRAP_TAG_HEADER_UNION), + ('INT_CONTEXT_UNION', union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION), +] + +SDMA_PKT_TRAP = struct_SDMA_PKT_TRAP_TAG +class struct_SDMA_PKT_HDP_FLUSH_TAG(Structure): + pass + +struct_SDMA_PKT_HDP_FLUSH_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_HDP_FLUSH_TAG._fields_ = [ + ('DW_0_DATA', ctypes.c_uint32), + ('DW_1_DATA', ctypes.c_uint32), + ('DW_2_DATA', ctypes.c_uint32), + ('DW_3_DATA', ctypes.c_uint32), + ('DW_4_DATA', ctypes.c_uint32), + ('DW_5_DATA', ctypes.c_uint32), +] + +SDMA_PKT_HDP_FLUSH = struct_SDMA_PKT_HDP_FLUSH_TAG +hdp_flush_cmd = struct_SDMA_PKT_HDP_FLUSH_TAG # Variable struct_SDMA_PKT_HDP_FLUSH_TAG +class struct_SDMA_PKT_GCR_TAG(Structure): + pass + +class union_SDMA_PKT_GCR_TAG_HEADER_UNION(Union): + pass + +class struct_SDMA_PKT_GCR_TAG_0_0(Structure): + pass + +struct_SDMA_PKT_GCR_TAG_0_0._pack_ = 1 # source:False +struct_SDMA_PKT_GCR_TAG_0_0._fields_ = [ + ('op', ctypes.c_uint32, 8), + ('sub_op', ctypes.c_uint32, 8), + ('_2', ctypes.c_uint32, 16), +] + +union_SDMA_PKT_GCR_TAG_HEADER_UNION._pack_ = 1 # source:False +union_SDMA_PKT_GCR_TAG_HEADER_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_GCR_TAG_HEADER_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_GCR_TAG_0_0), + ('DW_0_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_GCR_TAG_WORD1_UNION(Union): + pass + +class struct_SDMA_PKT_GCR_TAG_1_0(Structure): + pass + +struct_SDMA_PKT_GCR_TAG_1_0._pack_ = 1 # source:False +struct_SDMA_PKT_GCR_TAG_1_0._fields_ = [ + ('_0', ctypes.c_uint32, 7), + ('BaseVA_LO', ctypes.c_uint32, 25), +] + +union_SDMA_PKT_GCR_TAG_WORD1_UNION._pack_ = 1 # source:False +union_SDMA_PKT_GCR_TAG_WORD1_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_GCR_TAG_WORD1_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_GCR_TAG_1_0), + ('DW_1_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_GCR_TAG_WORD2_UNION(Union): + pass + +class struct_SDMA_PKT_GCR_TAG_2_0(Structure): + pass + +struct_SDMA_PKT_GCR_TAG_2_0._pack_ = 1 # source:False +struct_SDMA_PKT_GCR_TAG_2_0._fields_ = [ + ('BaseVA_HI', ctypes.c_uint32, 16), + ('GCR_CONTROL_GLI_INV', ctypes.c_uint32, 2), + ('GCR_CONTROL_GL1_RANGE', ctypes.c_uint32, 2), + ('GCR_CONTROL_GLM_WB', ctypes.c_uint32, 1), + ('GCR_CONTROL_GLM_INV', ctypes.c_uint32, 1), + ('GCR_CONTROL_GLK_WB', ctypes.c_uint32, 1), + ('GCR_CONTROL_GLK_INV', ctypes.c_uint32, 1), + ('GCR_CONTROL_GLV_INV', ctypes.c_uint32, 1), + ('GCR_CONTROL_GL1_INV', ctypes.c_uint32, 1), + ('GCR_CONTROL_GL2_US', ctypes.c_uint32, 1), + ('GCR_CONTROL_GL2_RANGE', ctypes.c_uint32, 2), + ('GCR_CONTROL_GL2_DISCARD', ctypes.c_uint32, 1), + ('GCR_CONTROL_GL2_INV', ctypes.c_uint32, 1), + ('GCR_CONTROL_GL2_WB', ctypes.c_uint32, 1), +] + +union_SDMA_PKT_GCR_TAG_WORD2_UNION._pack_ = 1 # source:False +union_SDMA_PKT_GCR_TAG_WORD2_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_GCR_TAG_WORD2_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_GCR_TAG_2_0), + ('DW_2_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_GCR_TAG_WORD3_UNION(Union): + pass + +class struct_SDMA_PKT_GCR_TAG_3_0(Structure): + pass + +struct_SDMA_PKT_GCR_TAG_3_0._pack_ = 1 # source:False +struct_SDMA_PKT_GCR_TAG_3_0._fields_ = [ + ('GCR_CONTROL_RANGE_IS_PA', ctypes.c_uint32, 1), + ('GCR_CONTROL_SEQ', ctypes.c_uint32, 2), + ('_2', ctypes.c_uint32, 4), + ('LimitVA_LO', ctypes.c_uint32, 25), +] + +union_SDMA_PKT_GCR_TAG_WORD3_UNION._pack_ = 1 # source:False +union_SDMA_PKT_GCR_TAG_WORD3_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_GCR_TAG_WORD3_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_GCR_TAG_3_0), + ('DW_3_DATA', ctypes.c_uint32), +] + +class union_SDMA_PKT_GCR_TAG_WORD4_UNION(Union): + pass + +class struct_SDMA_PKT_GCR_TAG_4_0(Structure): + pass + +struct_SDMA_PKT_GCR_TAG_4_0._pack_ = 1 # source:False +struct_SDMA_PKT_GCR_TAG_4_0._fields_ = [ + ('LimitVA_HI', ctypes.c_uint32, 16), + ('_1', ctypes.c_uint32, 8), + ('VMID', ctypes.c_uint32, 4), + ('_3', ctypes.c_uint32, 4), +] + +union_SDMA_PKT_GCR_TAG_WORD4_UNION._pack_ = 1 # source:False +union_SDMA_PKT_GCR_TAG_WORD4_UNION._anonymous_ = ('_0',) +union_SDMA_PKT_GCR_TAG_WORD4_UNION._fields_ = [ + ('_0', struct_SDMA_PKT_GCR_TAG_4_0), + ('DW_4_DATA', ctypes.c_uint32), +] + +struct_SDMA_PKT_GCR_TAG._pack_ = 1 # source:False +struct_SDMA_PKT_GCR_TAG._fields_ = [ + ('HEADER_UNION', union_SDMA_PKT_GCR_TAG_HEADER_UNION), + ('WORD1_UNION', union_SDMA_PKT_GCR_TAG_WORD1_UNION), + ('WORD2_UNION', union_SDMA_PKT_GCR_TAG_WORD2_UNION), + ('WORD3_UNION', union_SDMA_PKT_GCR_TAG_WORD3_UNION), + ('WORD4_UNION', union_SDMA_PKT_GCR_TAG_WORD4_UNION), +] + +SDMA_PKT_GCR = struct_SDMA_PKT_GCR_TAG +__all__ = \ + ['SDMA_ATOMIC_ADD64', 'SDMA_OP_ATOMIC', 'SDMA_OP_CONST_FILL', + 'SDMA_OP_COPY', 'SDMA_OP_FENCE', 'SDMA_OP_GCR', + 'SDMA_OP_POLL_REGMEM', 'SDMA_OP_TIMESTAMP', 'SDMA_OP_TRAP', + 'SDMA_PKT_ATOMIC', 'SDMA_PKT_CONSTANT_FILL', + 'SDMA_PKT_COPY_LINEAR', 'SDMA_PKT_COPY_LINEAR_RECT', + 'SDMA_PKT_FENCE', 'SDMA_PKT_GCR', 'SDMA_PKT_HDP_FLUSH', + 'SDMA_PKT_POLL_REGMEM', 'SDMA_PKT_TIMESTAMP', 'SDMA_PKT_TRAP', + 'SDMA_SUBOP_COPY_LINEAR', 'SDMA_SUBOP_COPY_LINEAR_RECT', + 'SDMA_SUBOP_TIMESTAMP_GET_GLOBAL', 'SDMA_SUBOP_USER_GCR', + 'hdp_flush_cmd', 'struct_SDMA_PKT_ATOMIC_TAG', + 'struct_SDMA_PKT_ATOMIC_TAG_0_0', + 'struct_SDMA_PKT_ATOMIC_TAG_1_0', + 'struct_SDMA_PKT_ATOMIC_TAG_2_0', + 'struct_SDMA_PKT_ATOMIC_TAG_3_0', + 'struct_SDMA_PKT_ATOMIC_TAG_4_0', + 'struct_SDMA_PKT_ATOMIC_TAG_5_0', + 'struct_SDMA_PKT_ATOMIC_TAG_6_0', + 'struct_SDMA_PKT_ATOMIC_TAG_7_0', + 'struct_SDMA_PKT_CONSTANT_FILL_TAG', + 'struct_SDMA_PKT_CONSTANT_FILL_TAG_0_0', + 'struct_SDMA_PKT_CONSTANT_FILL_TAG_1_0', + 'struct_SDMA_PKT_CONSTANT_FILL_TAG_2_0', + 'struct_SDMA_PKT_CONSTANT_FILL_TAG_3_0', + 'struct_SDMA_PKT_CONSTANT_FILL_TAG_4_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_0_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_10_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_11_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_12_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_1_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_2_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_3_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_4_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_5_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_6_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_7_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_8_0', + 'struct_SDMA_PKT_COPY_LINEAR_RECT_TAG_9_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_0_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_1_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_2_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_3_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_4_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_5_0', + 'struct_SDMA_PKT_COPY_LINEAR_TAG_6_0', + 'struct_SDMA_PKT_FENCE_TAG', 'struct_SDMA_PKT_FENCE_TAG_0_0', + 'struct_SDMA_PKT_FENCE_TAG_1_0', 'struct_SDMA_PKT_FENCE_TAG_2_0', + 'struct_SDMA_PKT_FENCE_TAG_3_0', 'struct_SDMA_PKT_GCR_TAG', + 'struct_SDMA_PKT_GCR_TAG_0_0', 'struct_SDMA_PKT_GCR_TAG_1_0', + 'struct_SDMA_PKT_GCR_TAG_2_0', 'struct_SDMA_PKT_GCR_TAG_3_0', + 'struct_SDMA_PKT_GCR_TAG_4_0', 'struct_SDMA_PKT_HDP_FLUSH_TAG', + 'struct_SDMA_PKT_POLL_REGMEM_TAG', + 'struct_SDMA_PKT_POLL_REGMEM_TAG_0_0', + 'struct_SDMA_PKT_POLL_REGMEM_TAG_1_0', + 'struct_SDMA_PKT_POLL_REGMEM_TAG_2_0', + 'struct_SDMA_PKT_POLL_REGMEM_TAG_3_0', + 'struct_SDMA_PKT_POLL_REGMEM_TAG_4_0', + 'struct_SDMA_PKT_POLL_REGMEM_TAG_5_0', + 'struct_SDMA_PKT_TIMESTAMP_TAG', + 'struct_SDMA_PKT_TIMESTAMP_TAG_0_0', + 'struct_SDMA_PKT_TIMESTAMP_TAG_1_0', + 'struct_SDMA_PKT_TIMESTAMP_TAG_2_0', 'struct_SDMA_PKT_TRAP_TAG', + 'struct_SDMA_PKT_TRAP_TAG_0_0', 'struct_SDMA_PKT_TRAP_TAG_1_0', + 'union_SDMA_PKT_ATOMIC_TAG_ADDR_HI_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_ADDR_LO_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_HI_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_CMP_DATA_LO_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_HEADER_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_LOOP_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_HI_UNION', + 'union_SDMA_PKT_ATOMIC_TAG_SRC_DATA_LO_UNION', + 'union_SDMA_PKT_CONSTANT_FILL_TAG_COUNT_UNION', + 'union_SDMA_PKT_CONSTANT_FILL_TAG_DATA_UNION', + 'union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_HI_UNION', + 'union_SDMA_PKT_CONSTANT_FILL_TAG_DST_ADDR_LO_UNION', + 'union_SDMA_PKT_CONSTANT_FILL_TAG_HEADER_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_HI_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_ADDR_LO_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_1_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_2_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_DST_PARAMETER_3_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_HEADER_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_1_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_RECT_PARAMETER_2_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_HI_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_ADDR_LO_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_1_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_2_UNION', + 'union_SDMA_PKT_COPY_LINEAR_RECT_TAG_SRC_PARAMETER_3_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_COUNT_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_HI_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_DST_ADDR_LO_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_HEADER_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_PARAMETER_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_HI_UNION', + 'union_SDMA_PKT_COPY_LINEAR_TAG_SRC_ADDR_LO_UNION', + 'union_SDMA_PKT_FENCE_TAG_ADDR_HI_UNION', + 'union_SDMA_PKT_FENCE_TAG_ADDR_LO_UNION', + 'union_SDMA_PKT_FENCE_TAG_DATA_UNION', + 'union_SDMA_PKT_FENCE_TAG_HEADER_UNION', + 'union_SDMA_PKT_GCR_TAG_HEADER_UNION', + 'union_SDMA_PKT_GCR_TAG_WORD1_UNION', + 'union_SDMA_PKT_GCR_TAG_WORD2_UNION', + 'union_SDMA_PKT_GCR_TAG_WORD3_UNION', + 'union_SDMA_PKT_GCR_TAG_WORD4_UNION', + 'union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_HI_UNION', + 'union_SDMA_PKT_POLL_REGMEM_TAG_ADDR_LO_UNION', + 'union_SDMA_PKT_POLL_REGMEM_TAG_DW5_UNION', + 'union_SDMA_PKT_POLL_REGMEM_TAG_HEADER_UNION', + 'union_SDMA_PKT_POLL_REGMEM_TAG_MASK_UNION', + 'union_SDMA_PKT_POLL_REGMEM_TAG_VALUE_UNION', + 'union_SDMA_PKT_TIMESTAMP_TAG_ADDR_HI_UNION', + 'union_SDMA_PKT_TIMESTAMP_TAG_ADDR_LO_UNION', + 'union_SDMA_PKT_TIMESTAMP_TAG_HEADER_UNION', + 'union_SDMA_PKT_TRAP_TAG_HEADER_UNION', + 'union_SDMA_PKT_TRAP_TAG_INT_CONTEXT_UNION'] diff --git a/tinygrad/runtime/autogen/hsa.py b/tinygrad/runtime/autogen/hsa.py index 804e2de720..86a27530ba 100644 --- a/tinygrad/runtime/autogen/hsa.py +++ b/tinygrad/runtime/autogen/hsa.py @@ -3588,6 +3588,128 @@ try: hsa_amd_vmem_get_alloc_properties_from_handle.argtypes = [hsa_amd_vmem_alloc_handle_t, ctypes.POINTER(struct_hsa_amd_memory_pool_s), ctypes.POINTER(c__EA_hsa_amd_memory_type_t)] except AttributeError: pass +amd_queue_properties32_t = ctypes.c_uint32 + +# values for enumeration 'amd_queue_properties_t' +amd_queue_properties_t__enumvalues = { + 0: 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_SHIFT', + 1: 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_WIDTH', + 1: 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER', + 1: 'AMD_QUEUE_PROPERTIES_IS_PTR64_SHIFT', + 1: 'AMD_QUEUE_PROPERTIES_IS_PTR64_WIDTH', + 2: 'AMD_QUEUE_PROPERTIES_IS_PTR64', + 2: 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS_SHIFT', + 1: 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS_WIDTH', + 4: 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS', + 3: 'AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_SHIFT', + 1: 'AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_WIDTH', + 8: 'AMD_QUEUE_PROPERTIES_ENABLE_PROFILING', + 4: 'AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE_SHIFT', + 1: 'AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE_WIDTH', + 16: 'AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE', + 5: 'AMD_QUEUE_PROPERTIES_RESERVED1_SHIFT', + 27: 'AMD_QUEUE_PROPERTIES_RESERVED1_WIDTH', + -32: 'AMD_QUEUE_PROPERTIES_RESERVED1', +} +AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_SHIFT = 0 +AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_WIDTH = 1 +AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER = 1 +AMD_QUEUE_PROPERTIES_IS_PTR64_SHIFT = 1 +AMD_QUEUE_PROPERTIES_IS_PTR64_WIDTH = 1 +AMD_QUEUE_PROPERTIES_IS_PTR64 = 2 +AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS_SHIFT = 2 +AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS_WIDTH = 1 +AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS = 4 +AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_SHIFT = 3 +AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_WIDTH = 1 +AMD_QUEUE_PROPERTIES_ENABLE_PROFILING = 8 +AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE_SHIFT = 4 +AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE_WIDTH = 1 +AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE = 16 +AMD_QUEUE_PROPERTIES_RESERVED1_SHIFT = 5 +AMD_QUEUE_PROPERTIES_RESERVED1_WIDTH = 27 +AMD_QUEUE_PROPERTIES_RESERVED1 = -32 +amd_queue_properties_t = ctypes.c_int32 # enum +class struct_amd_queue_s(Structure): + pass + +struct_amd_queue_s._pack_ = 1 # source:False +struct_amd_queue_s._fields_ = [ + ('hsa_queue', hsa_queue_t), + ('reserved1', ctypes.c_uint32 * 4), + ('write_dispatch_id', ctypes.c_uint64), + ('group_segment_aperture_base_hi', ctypes.c_uint32), + ('private_segment_aperture_base_hi', ctypes.c_uint32), + ('max_cu_id', ctypes.c_uint32), + ('max_wave_id', ctypes.c_uint32), + ('max_legacy_doorbell_dispatch_id_plus_1', ctypes.c_uint64), + ('legacy_doorbell_lock', ctypes.c_uint32), + ('reserved2', ctypes.c_uint32 * 9), + ('read_dispatch_id', ctypes.c_uint64), + ('read_dispatch_id_field_base_byte_offset', ctypes.c_uint32), + ('compute_tmpring_size', ctypes.c_uint32), + ('scratch_resource_descriptor', ctypes.c_uint32 * 4), + ('scratch_backing_memory_location', ctypes.c_uint64), + ('scratch_backing_memory_byte_size', ctypes.c_uint64), + ('scratch_wave64_lane_byte_size', ctypes.c_uint32), + ('queue_properties', ctypes.c_uint32), + ('reserved3', ctypes.c_uint32 * 2), + ('queue_inactive_signal', hsa_signal_t), + ('reserved4', ctypes.c_uint32 * 14), +] + +amd_queue_t = struct_amd_queue_s +amd_signal_kind64_t = ctypes.c_int64 + +# values for enumeration 'amd_signal_kind_t' +amd_signal_kind_t__enumvalues = { + 0: 'AMD_SIGNAL_KIND_INVALID', + 1: 'AMD_SIGNAL_KIND_USER', + -1: 'AMD_SIGNAL_KIND_DOORBELL', + -2: 'AMD_SIGNAL_KIND_LEGACY_DOORBELL', +} +AMD_SIGNAL_KIND_INVALID = 0 +AMD_SIGNAL_KIND_USER = 1 +AMD_SIGNAL_KIND_DOORBELL = -1 +AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2 +amd_signal_kind_t = ctypes.c_int32 # enum +class struct_amd_signal_s(Structure): + pass + +class union_amd_signal_s_0(Union): + pass + +union_amd_signal_s_0._pack_ = 1 # source:False +union_amd_signal_s_0._fields_ = [ + ('value', ctypes.c_int64), + ('legacy_hardware_doorbell_ptr', ctypes.POINTER(ctypes.c_uint32)), + ('hardware_doorbell_ptr', ctypes.POINTER(ctypes.c_uint64)), +] + +class union_amd_signal_s_1(Union): + pass + +union_amd_signal_s_1._pack_ = 1 # source:False +union_amd_signal_s_1._fields_ = [ + ('queue_ptr', ctypes.POINTER(struct_amd_queue_s)), + ('reserved2', ctypes.c_uint64), +] + +struct_amd_signal_s._pack_ = 1 # source:False +struct_amd_signal_s._anonymous_ = ('_0', '_1',) +struct_amd_signal_s._fields_ = [ + ('kind', ctypes.c_int64), + ('_0', union_amd_signal_s_0), + ('event_mailbox_ptr', ctypes.c_uint64), + ('event_id', ctypes.c_uint32), + ('reserved1', ctypes.c_uint32), + ('start_ts', ctypes.c_uint64), + ('end_ts', ctypes.c_uint64), + ('_1', union_amd_signal_s_1), + ('reserved3', ctypes.c_uint32 * 2), +] + +amd_signal_t = struct_amd_signal_s class struct_BrigModuleHeader(Structure): pass @@ -3713,7 +3835,27 @@ struct_hsa_ext_finalizer_1_00_pfn_s._fields_ = [ hsa_ext_finalizer_1_00_pfn_t = struct_hsa_ext_finalizer_1_00_pfn_s __all__ = \ - ['BrigModule_t', 'HSA_ACCESS_PERMISSION_NONE', + ['AMD_QUEUE_PROPERTIES_ENABLE_PROFILING', + 'AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_SHIFT', + 'AMD_QUEUE_PROPERTIES_ENABLE_PROFILING_WIDTH', + 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER', + 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS', + 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS_SHIFT', + 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS_WIDTH', + 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_SHIFT', + 'AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_WIDTH', + 'AMD_QUEUE_PROPERTIES_IS_PTR64', + 'AMD_QUEUE_PROPERTIES_IS_PTR64_SHIFT', + 'AMD_QUEUE_PROPERTIES_IS_PTR64_WIDTH', + 'AMD_QUEUE_PROPERTIES_RESERVED1', + 'AMD_QUEUE_PROPERTIES_RESERVED1_SHIFT', + 'AMD_QUEUE_PROPERTIES_RESERVED1_WIDTH', + 'AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE', + 'AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE_SHIFT', + 'AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE_WIDTH', + 'AMD_SIGNAL_KIND_DOORBELL', 'AMD_SIGNAL_KIND_INVALID', + 'AMD_SIGNAL_KIND_LEGACY_DOORBELL', 'AMD_SIGNAL_KIND_USER', + 'BrigModule_t', 'HSA_ACCESS_PERMISSION_NONE', 'HSA_ACCESS_PERMISSION_RO', 'HSA_ACCESS_PERMISSION_RW', 'HSA_ACCESS_PERMISSION_WO', 'HSA_AGENT_FEATURE_AGENT_DISPATCH', 'HSA_AGENT_FEATURE_KERNEL_DISPATCH', @@ -4082,8 +4224,10 @@ __all__ = \ 'HSA_VARIABLE_SEGMENT_READONLY', 'HSA_WAIT_STATE_ACTIVE', 'HSA_WAIT_STATE_BLOCKED', 'HSA_WAVEFRONT_INFO_SIZE', 'MEMORY_TYPE_NONE', 'MEMORY_TYPE_PINNED', - 'c__EA_hsa_access_permission_t', 'c__EA_hsa_agent_feature_t', - 'c__EA_hsa_agent_info_t', + 'amd_queue_properties32_t', 'amd_queue_properties_t', + 'amd_queue_t', 'amd_signal_kind64_t', 'amd_signal_kind_t', + 'amd_signal_t', 'c__EA_hsa_access_permission_t', + 'c__EA_hsa_agent_feature_t', 'c__EA_hsa_agent_info_t', 'c__EA_hsa_amd_agent_memory_pool_info_t', 'c__EA_hsa_amd_copy_direction_t', 'c__EA_hsa_amd_hw_exception_reset_cause_t', @@ -4422,6 +4566,7 @@ __all__ = \ 'hsa_wait_state_t__enumvalues', 'hsa_wavefront_get_info', 'hsa_wavefront_info_t', 'hsa_wavefront_info_t__enumvalues', 'hsa_wavefront_t', 'int32_t', 'size_t', 'struct_BrigModuleHeader', + 'struct_amd_queue_s', 'struct_amd_signal_s', 'struct_hsa_agent_dispatch_packet_s', 'struct_hsa_agent_s', 'struct_hsa_amd_barrier_value_packet_s', 'struct_hsa_amd_event_s', 'struct_hsa_amd_gpu_hw_exception_info_s', @@ -4455,4 +4600,5 @@ __all__ = \ 'struct_hsa_queue_s', 'struct_hsa_region_s', 'struct_hsa_signal_group_s', 'struct_hsa_signal_s', 'struct_hsa_wavefront_s', 'uint16_t', 'uint32_t', 'uint64_t', + 'union_amd_signal_s_0', 'union_amd_signal_s_1', 'union_hsa_amd_event_s_0'] diff --git a/tinygrad/runtime/autogen/kfd.py b/tinygrad/runtime/autogen/kfd.py new file mode 100644 index 0000000000..8772dc0751 --- /dev/null +++ b/tinygrad/runtime/autogen/kfd.py @@ -0,0 +1,812 @@ +# mypy: ignore-errors +# -*- coding: utf-8 -*- +# +# TARGET arch is: [] +# WORD_SIZE is: 8 +# POINTER_SIZE is: 8 +# LONGDOUBLE_SIZE is: 16 +# +import ctypes, os + + +class AsDictMixin: + @classmethod + def as_dict(cls, self): + result = {} + if not isinstance(self, AsDictMixin): + # not a structure, assume it's already a python object + return self + if not hasattr(cls, "_fields_"): + return result + # sys.version_info >= (3, 5) + # for (field, *_) in cls._fields_: # noqa + for field_tuple in cls._fields_: # noqa + field = field_tuple[0] + if field.startswith('PADDING_'): + continue + value = getattr(self, field) + type_ = type(value) + if hasattr(value, "_length_") and hasattr(value, "_type_"): + # array + if not hasattr(type_, "as_dict"): + value = [v for v in value] + else: + type_ = type_._type_ + value = [type_.as_dict(v) for v in value] + elif hasattr(value, "contents") and hasattr(value, "_type_"): + # pointer + try: + if not hasattr(type_, "as_dict"): + value = value.contents + else: + type_ = type_._type_ + value = type_.as_dict(value.contents) + except ValueError: + # nullptr + value = None + elif isinstance(value, AsDictMixin): + # other structure + value = type_.as_dict(value) + result[field] = value + return result + + +class Structure(ctypes.Structure, AsDictMixin): + + def __init__(self, *args, **kwds): + # We don't want to use positional arguments fill PADDING_* fields + + args = dict(zip(self.__class__._field_names_(), args)) + args.update(kwds) + super(Structure, self).__init__(**args) + + @classmethod + def _field_names_(cls): + if hasattr(cls, '_fields_'): + return (f[0] for f in cls._fields_ if not f[0].startswith('PADDING')) + else: + return () + + @classmethod + def get_type(cls, field): + for f in cls._fields_: + if f[0] == field: + return f[1] + return None + + @classmethod + def bind(cls, bound_fields): + fields = {} + for name, type_ in cls._fields_: + if hasattr(type_, "restype"): + if name in bound_fields: + if bound_fields[name] is None: + fields[name] = type_() + else: + # use a closure to capture the callback from the loop scope + fields[name] = ( + type_((lambda callback: lambda *args: callback(*args))( + bound_fields[name])) + ) + del bound_fields[name] + else: + # default callback implementation (does nothing) + try: + default_ = type_(0).restype().value + except TypeError: + default_ = None + fields[name] = type_(( + lambda default_: lambda *args: default_)(default_)) + else: + # not a callback function, use default initialization + if name in bound_fields: + fields[name] = bound_fields[name] + del bound_fields[name] + else: + fields[name] = type_() + if len(bound_fields) != 0: + raise ValueError( + "Cannot bind the following unknown callback(s) {}.{}".format( + cls.__name__, bound_fields.keys() + )) + return cls(**fields) + + +class Union(ctypes.Union, AsDictMixin): + pass + + + + + +KFD_IOCTL_H_INCLUDED = True # macro +KFD_IOCTL_MAJOR_VERSION = 1 # macro +KFD_IOCTL_MINOR_VERSION = 6 # macro +KFD_IOC_QUEUE_TYPE_COMPUTE = 0x0 # macro +KFD_IOC_QUEUE_TYPE_SDMA = 0x1 # macro +KFD_IOC_QUEUE_TYPE_COMPUTE_AQL = 0x2 # macro +KFD_IOC_QUEUE_TYPE_SDMA_XGMI = 0x3 # macro +KFD_MAX_QUEUE_PERCENTAGE = 100 # macro +KFD_MAX_QUEUE_PRIORITY = 15 # macro +KFD_IOC_CACHE_POLICY_COHERENT = 0 # macro +KFD_IOC_CACHE_POLICY_NONCOHERENT = 1 # macro +NUM_OF_SUPPORTED_GPUS = 7 # macro +MAX_ALLOWED_NUM_POINTS = 100 # macro +MAX_ALLOWED_AW_BUFF_SIZE = 4096 # macro +MAX_ALLOWED_WAC_BUFF_SIZE = 128 # macro +KFD_IOC_EVENT_SIGNAL = 0 # macro +KFD_IOC_EVENT_NODECHANGE = 1 # macro +KFD_IOC_EVENT_DEVICESTATECHANGE = 2 # macro +KFD_IOC_EVENT_HW_EXCEPTION = 3 # macro +KFD_IOC_EVENT_SYSTEM_EVENT = 4 # macro +KFD_IOC_EVENT_DEBUG_EVENT = 5 # macro +KFD_IOC_EVENT_PROFILE_EVENT = 6 # macro +KFD_IOC_EVENT_QUEUE_EVENT = 7 # macro +KFD_IOC_EVENT_MEMORY = 8 # macro +KFD_IOC_WAIT_RESULT_COMPLETE = 0 # macro +KFD_IOC_WAIT_RESULT_TIMEOUT = 1 # macro +KFD_IOC_WAIT_RESULT_FAIL = 2 # macro +KFD_SIGNAL_EVENT_LIMIT = 4096 # macro +KFD_HW_EXCEPTION_WHOLE_GPU_RESET = 0 # macro +KFD_HW_EXCEPTION_PER_ENGINE_RESET = 1 # macro +KFD_HW_EXCEPTION_GPU_HANG = 0 # macro +KFD_HW_EXCEPTION_ECC = 1 # macro +KFD_MEM_ERR_NO_RAS = 0 # macro +KFD_MEM_ERR_SRAM_ECC = 1 # macro +KFD_MEM_ERR_POISON_CONSUMED = 2 # macro +KFD_MEM_ERR_GPU_HANG = 3 # macro +KFD_IOC_ALLOC_MEM_FLAGS_VRAM = (1<<0) # macro +KFD_IOC_ALLOC_MEM_FLAGS_GTT = (1<<1) # macro +KFD_IOC_ALLOC_MEM_FLAGS_USERPTR = (1<<2) # macro +KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL = (1<<3) # macro +KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP = (1<<4) # macro +KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE = (1<<31) # macro +KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE = (1<<30) # macro +KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC = (1<<29) # macro +KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE = (1<<28) # macro +KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM = (1<<27) # macro +KFD_IOC_ALLOC_MEM_FLAGS_COHERENT = (1<<26) # macro +KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED = (1<<25) # macro +# def KFD_SMI_EVENT_MASK_FROM_INDEX(i): # macro +# return (1<<((i)-1)) +KFD_IOCTL_SVM_FLAG_HOST_ACCESS = 0x00000001 # macro +KFD_IOCTL_SVM_FLAG_COHERENT = 0x00000002 # macro +KFD_IOCTL_SVM_FLAG_HIVE_LOCAL = 0x00000004 # macro +KFD_IOCTL_SVM_FLAG_GPU_RO = 0x00000008 # macro +KFD_IOCTL_SVM_FLAG_GPU_EXEC = 0x00000010 # macro +KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY = 0x00000020 # macro +AMDKFD_IOCTL_BASE = 'K' # macro +# def AMDKFD_IO(nr): # macro +# return _IO('K',nr) +# def AMDKFD_IOR(nr, type): # macro +# return _IOR('K',nr,type) +# def AMDKFD_IOW(nr, type): # macro +# return _IOW('K',nr,type) +# def AMDKFD_IOWR(nr, type): # macro +# return _IOWR('K',nr,type) +# AMDKFD_IOC_GET_VERSION = _IOR('K',nr,type) ( 0x01 , struct kfd_ioctl_get_version_args ) # macro +# AMDKFD_IOC_CREATE_QUEUE = _IOWR('K',nr,type) ( 0x02 , struct kfd_ioctl_create_queue_args ) # macro +# AMDKFD_IOC_DESTROY_QUEUE = _IOWR('K',nr,type) ( 0x03 , struct kfd_ioctl_destroy_queue_args ) # macro +# AMDKFD_IOC_SET_MEMORY_POLICY = _IOW('K',nr,type) ( 0x04 , struct kfd_ioctl_set_memory_policy_args ) # macro +# AMDKFD_IOC_GET_CLOCK_COUNTERS = _IOWR('K',nr,type) ( 0x05 , struct kfd_ioctl_get_clock_counters_args ) # macro +# AMDKFD_IOC_GET_PROCESS_APERTURES = _IOR('K',nr,type) ( 0x06 , struct kfd_ioctl_get_process_apertures_args ) # macro +# AMDKFD_IOC_UPDATE_QUEUE = _IOW('K',nr,type) ( 0x07 , struct kfd_ioctl_update_queue_args ) # macro +# AMDKFD_IOC_CREATE_EVENT = _IOWR('K',nr,type) ( 0x08 , struct kfd_ioctl_create_event_args ) # macro +# AMDKFD_IOC_DESTROY_EVENT = _IOW('K',nr,type) ( 0x09 , struct kfd_ioctl_destroy_event_args ) # macro +# AMDKFD_IOC_SET_EVENT = _IOW('K',nr,type) ( 0x0A , struct kfd_ioctl_set_event_args ) # macro +# AMDKFD_IOC_RESET_EVENT = _IOW('K',nr,type) ( 0x0B , struct kfd_ioctl_reset_event_args ) # macro +# AMDKFD_IOC_WAIT_EVENTS = _IOWR('K',nr,type) ( 0x0C , struct kfd_ioctl_wait_events_args ) # macro +# AMDKFD_IOC_DBG_REGISTER = _IOW('K',nr,type) ( 0x0D , struct kfd_ioctl_dbg_register_args ) # macro +# AMDKFD_IOC_DBG_UNREGISTER = _IOW('K',nr,type) ( 0x0E , struct kfd_ioctl_dbg_unregister_args ) # macro +# AMDKFD_IOC_DBG_ADDRESS_WATCH = _IOW('K',nr,type) ( 0x0F , struct kfd_ioctl_dbg_address_watch_args ) # macro +# AMDKFD_IOC_DBG_WAVE_CONTROL = _IOW('K',nr,type) ( 0x10 , struct kfd_ioctl_dbg_wave_control_args ) # macro +# AMDKFD_IOC_SET_SCRATCH_BACKING_VA = _IOWR('K',nr,type) ( 0x11 , struct kfd_ioctl_set_scratch_backing_va_args ) # macro +# AMDKFD_IOC_GET_TILE_CONFIG = _IOWR('K',nr,type) ( 0x12 , struct kfd_ioctl_get_tile_config_args ) # macro +# AMDKFD_IOC_SET_TRAP_HANDLER = _IOW('K',nr,type) ( 0x13 , struct kfd_ioctl_set_trap_handler_args ) # macro +# AMDKFD_IOC_GET_PROCESS_APERTURES_NEW = _IOWR('K',nr,type) ( 0x14 , struct kfd_ioctl_get_process_apertures_new_args ) # macro +# AMDKFD_IOC_ACQUIRE_VM = _IOW('K',nr,type) ( 0x15 , struct kfd_ioctl_acquire_vm_args ) # macro +# AMDKFD_IOC_ALLOC_MEMORY_OF_GPU = _IOWR('K',nr,type) ( 0x16 , struct kfd_ioctl_alloc_memory_of_gpu_args ) # macro +# AMDKFD_IOC_FREE_MEMORY_OF_GPU = _IOW('K',nr,type) ( 0x17 , struct kfd_ioctl_free_memory_of_gpu_args ) # macro +# AMDKFD_IOC_MAP_MEMORY_TO_GPU = _IOWR('K',nr,type) ( 0x18 , struct kfd_ioctl_map_memory_to_gpu_args ) # macro +# AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU = _IOWR('K',nr,type) ( 0x19 , struct kfd_ioctl_unmap_memory_from_gpu_args ) # macro +# AMDKFD_IOC_SET_CU_MASK = _IOW('K',nr,type) ( 0x1A , struct kfd_ioctl_set_cu_mask_args ) # macro +# AMDKFD_IOC_GET_QUEUE_WAVE_STATE = _IOWR('K',nr,type) ( 0x1B , struct kfd_ioctl_get_queue_wave_state_args ) # macro +# AMDKFD_IOC_GET_DMABUF_INFO = _IOWR('K',nr,type) ( 0x1C , struct kfd_ioctl_get_dmabuf_info_args ) # macro +# AMDKFD_IOC_IMPORT_DMABUF = _IOWR('K',nr,type) ( 0x1D , struct kfd_ioctl_import_dmabuf_args ) # macro +# AMDKFD_IOC_ALLOC_QUEUE_GWS = _IOWR('K',nr,type) ( 0x1E , struct kfd_ioctl_alloc_queue_gws_args ) # macro +# AMDKFD_IOC_SMI_EVENTS = _IOWR('K',nr,type) ( 0x1F , struct kfd_ioctl_smi_events_args ) # macro +# AMDKFD_IOC_SVM = _IOWR('K',nr,type) ( 0x20 , struct kfd_ioctl_svm_args ) # macro +# AMDKFD_IOC_SET_XNACK_MODE = _IOWR('K',nr,type) ( 0x21 , struct kfd_ioctl_set_xnack_mode_args ) # macro +AMDKFD_COMMAND_START = 0x01 # macro +AMDKFD_COMMAND_END = 0x22 # macro +class struct_kfd_ioctl_get_version_args(Structure): + pass + +struct_kfd_ioctl_get_version_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_version_args._fields_ = [ + ('major_version', ctypes.c_uint32), + ('minor_version', ctypes.c_uint32), +] + +class struct_kfd_ioctl_create_queue_args(Structure): + pass + +struct_kfd_ioctl_create_queue_args._pack_ = 1 # source:False +struct_kfd_ioctl_create_queue_args._fields_ = [ + ('ring_base_address', ctypes.c_uint64), + ('write_pointer_address', ctypes.c_uint64), + ('read_pointer_address', ctypes.c_uint64), + ('doorbell_offset', ctypes.c_uint64), + ('ring_size', ctypes.c_uint32), + ('gpu_id', ctypes.c_uint32), + ('queue_type', ctypes.c_uint32), + ('queue_percentage', ctypes.c_uint32), + ('queue_priority', ctypes.c_uint32), + ('queue_id', ctypes.c_uint32), + ('eop_buffer_address', ctypes.c_uint64), + ('eop_buffer_size', ctypes.c_uint64), + ('ctx_save_restore_address', ctypes.c_uint64), + ('ctx_save_restore_size', ctypes.c_uint32), + ('ctl_stack_size', ctypes.c_uint32), +] + +class struct_kfd_ioctl_destroy_queue_args(Structure): + pass + +struct_kfd_ioctl_destroy_queue_args._pack_ = 1 # source:False +struct_kfd_ioctl_destroy_queue_args._fields_ = [ + ('queue_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_update_queue_args(Structure): + pass + +struct_kfd_ioctl_update_queue_args._pack_ = 1 # source:False +struct_kfd_ioctl_update_queue_args._fields_ = [ + ('ring_base_address', ctypes.c_uint64), + ('queue_id', ctypes.c_uint32), + ('ring_size', ctypes.c_uint32), + ('queue_percentage', ctypes.c_uint32), + ('queue_priority', ctypes.c_uint32), +] + +class struct_kfd_ioctl_set_cu_mask_args(Structure): + pass + +struct_kfd_ioctl_set_cu_mask_args._pack_ = 1 # source:False +struct_kfd_ioctl_set_cu_mask_args._fields_ = [ + ('queue_id', ctypes.c_uint32), + ('num_cu_mask', ctypes.c_uint32), + ('cu_mask_ptr', ctypes.c_uint64), +] + +class struct_kfd_ioctl_get_queue_wave_state_args(Structure): + pass + +struct_kfd_ioctl_get_queue_wave_state_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_queue_wave_state_args._fields_ = [ + ('ctl_stack_address', ctypes.c_uint64), + ('ctl_stack_used_size', ctypes.c_uint32), + ('save_area_used_size', ctypes.c_uint32), + ('queue_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_set_memory_policy_args(Structure): + pass + +struct_kfd_ioctl_set_memory_policy_args._pack_ = 1 # source:False +struct_kfd_ioctl_set_memory_policy_args._fields_ = [ + ('alternate_aperture_base', ctypes.c_uint64), + ('alternate_aperture_size', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('default_policy', ctypes.c_uint32), + ('alternate_policy', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_get_clock_counters_args(Structure): + pass + +struct_kfd_ioctl_get_clock_counters_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_clock_counters_args._fields_ = [ + ('gpu_clock_counter', ctypes.c_uint64), + ('cpu_clock_counter', ctypes.c_uint64), + ('system_clock_counter', ctypes.c_uint64), + ('system_clock_freq', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_process_device_apertures(Structure): + pass + +struct_kfd_process_device_apertures._pack_ = 1 # source:False +struct_kfd_process_device_apertures._fields_ = [ + ('lds_base', ctypes.c_uint64), + ('lds_limit', ctypes.c_uint64), + ('scratch_base', ctypes.c_uint64), + ('scratch_limit', ctypes.c_uint64), + ('gpuvm_base', ctypes.c_uint64), + ('gpuvm_limit', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_get_process_apertures_args(Structure): + pass + +struct_kfd_ioctl_get_process_apertures_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_process_apertures_args._fields_ = [ + ('process_apertures', struct_kfd_process_device_apertures * 7), + ('num_of_nodes', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_get_process_apertures_new_args(Structure): + pass + +struct_kfd_ioctl_get_process_apertures_new_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_process_apertures_new_args._fields_ = [ + ('kfd_process_device_apertures_ptr', ctypes.c_uint64), + ('num_of_nodes', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_dbg_register_args(Structure): + pass + +struct_kfd_ioctl_dbg_register_args._pack_ = 1 # source:False +struct_kfd_ioctl_dbg_register_args._fields_ = [ + ('gpu_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_dbg_unregister_args(Structure): + pass + +struct_kfd_ioctl_dbg_unregister_args._pack_ = 1 # source:False +struct_kfd_ioctl_dbg_unregister_args._fields_ = [ + ('gpu_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_dbg_address_watch_args(Structure): + pass + +struct_kfd_ioctl_dbg_address_watch_args._pack_ = 1 # source:False +struct_kfd_ioctl_dbg_address_watch_args._fields_ = [ + ('content_ptr', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('buf_size_in_bytes', ctypes.c_uint32), +] + +class struct_kfd_ioctl_dbg_wave_control_args(Structure): + pass + +struct_kfd_ioctl_dbg_wave_control_args._pack_ = 1 # source:False +struct_kfd_ioctl_dbg_wave_control_args._fields_ = [ + ('content_ptr', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('buf_size_in_bytes', ctypes.c_uint32), +] + +class struct_kfd_ioctl_create_event_args(Structure): + pass + +struct_kfd_ioctl_create_event_args._pack_ = 1 # source:False +struct_kfd_ioctl_create_event_args._fields_ = [ + ('event_page_offset', ctypes.c_uint64), + ('event_trigger_data', ctypes.c_uint32), + ('event_type', ctypes.c_uint32), + ('auto_reset', ctypes.c_uint32), + ('node_id', ctypes.c_uint32), + ('event_id', ctypes.c_uint32), + ('event_slot_index', ctypes.c_uint32), +] + +class struct_kfd_ioctl_destroy_event_args(Structure): + pass + +struct_kfd_ioctl_destroy_event_args._pack_ = 1 # source:False +struct_kfd_ioctl_destroy_event_args._fields_ = [ + ('event_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_set_event_args(Structure): + pass + +struct_kfd_ioctl_set_event_args._pack_ = 1 # source:False +struct_kfd_ioctl_set_event_args._fields_ = [ + ('event_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_reset_event_args(Structure): + pass + +struct_kfd_ioctl_reset_event_args._pack_ = 1 # source:False +struct_kfd_ioctl_reset_event_args._fields_ = [ + ('event_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_memory_exception_failure(Structure): + pass + +struct_kfd_memory_exception_failure._pack_ = 1 # source:False +struct_kfd_memory_exception_failure._fields_ = [ + ('NotPresent', ctypes.c_uint32), + ('ReadOnly', ctypes.c_uint32), + ('NoExecute', ctypes.c_uint32), + ('imprecise', ctypes.c_uint32), +] + +class struct_kfd_hsa_memory_exception_data(Structure): + pass + +struct_kfd_hsa_memory_exception_data._pack_ = 1 # source:False +struct_kfd_hsa_memory_exception_data._fields_ = [ + ('failure', struct_kfd_memory_exception_failure), + ('va', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('ErrorType', ctypes.c_uint32), +] + +class struct_kfd_hsa_hw_exception_data(Structure): + pass + +struct_kfd_hsa_hw_exception_data._pack_ = 1 # source:False +struct_kfd_hsa_hw_exception_data._fields_ = [ + ('reset_type', ctypes.c_uint32), + ('reset_cause', ctypes.c_uint32), + ('memory_lost', ctypes.c_uint32), + ('gpu_id', ctypes.c_uint32), +] + +class struct_kfd_event_data(Structure): + pass + +class union_kfd_event_data_0(Union): + pass + +union_kfd_event_data_0._pack_ = 1 # source:False +union_kfd_event_data_0._fields_ = [ + ('memory_exception_data', struct_kfd_hsa_memory_exception_data), + ('hw_exception_data', struct_kfd_hsa_hw_exception_data), + ('PADDING_0', ctypes.c_ubyte * 16), +] + +struct_kfd_event_data._pack_ = 1 # source:False +struct_kfd_event_data._anonymous_ = ('_0',) +struct_kfd_event_data._fields_ = [ + ('_0', union_kfd_event_data_0), + ('kfd_event_data_ext', ctypes.c_uint64), + ('event_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_wait_events_args(Structure): + pass + +struct_kfd_ioctl_wait_events_args._pack_ = 1 # source:False +struct_kfd_ioctl_wait_events_args._fields_ = [ + ('events_ptr', ctypes.c_uint64), + ('num_events', ctypes.c_uint32), + ('wait_for_all', ctypes.c_uint32), + ('timeout', ctypes.c_uint32), + ('wait_result', ctypes.c_uint32), +] + +class struct_kfd_ioctl_set_scratch_backing_va_args(Structure): + pass + +struct_kfd_ioctl_set_scratch_backing_va_args._pack_ = 1 # source:False +struct_kfd_ioctl_set_scratch_backing_va_args._fields_ = [ + ('va_addr', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_get_tile_config_args(Structure): + pass + +struct_kfd_ioctl_get_tile_config_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_tile_config_args._fields_ = [ + ('tile_config_ptr', ctypes.c_uint64), + ('macro_tile_config_ptr', ctypes.c_uint64), + ('num_tile_configs', ctypes.c_uint32), + ('num_macro_tile_configs', ctypes.c_uint32), + ('gpu_id', ctypes.c_uint32), + ('gb_addr_config', ctypes.c_uint32), + ('num_banks', ctypes.c_uint32), + ('num_ranks', ctypes.c_uint32), +] + +class struct_kfd_ioctl_set_trap_handler_args(Structure): + pass + +struct_kfd_ioctl_set_trap_handler_args._pack_ = 1 # source:False +struct_kfd_ioctl_set_trap_handler_args._fields_ = [ + ('tba_addr', ctypes.c_uint64), + ('tma_addr', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_acquire_vm_args(Structure): + pass + +struct_kfd_ioctl_acquire_vm_args._pack_ = 1 # source:False +struct_kfd_ioctl_acquire_vm_args._fields_ = [ + ('drm_fd', ctypes.c_uint32), + ('gpu_id', ctypes.c_uint32), +] + +class struct_kfd_ioctl_alloc_memory_of_gpu_args(Structure): + pass + +struct_kfd_ioctl_alloc_memory_of_gpu_args._pack_ = 1 # source:False +struct_kfd_ioctl_alloc_memory_of_gpu_args._fields_ = [ + ('va_addr', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('handle', ctypes.c_uint64), + ('mmap_offset', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('flags', ctypes.c_uint32), +] + +class struct_kfd_ioctl_free_memory_of_gpu_args(Structure): + pass + +struct_kfd_ioctl_free_memory_of_gpu_args._pack_ = 1 # source:False +struct_kfd_ioctl_free_memory_of_gpu_args._fields_ = [ + ('handle', ctypes.c_uint64), +] + +class struct_kfd_ioctl_map_memory_to_gpu_args(Structure): + pass + +struct_kfd_ioctl_map_memory_to_gpu_args._pack_ = 1 # source:False +struct_kfd_ioctl_map_memory_to_gpu_args._fields_ = [ + ('handle', ctypes.c_uint64), + ('device_ids_array_ptr', ctypes.c_uint64), + ('n_devices', ctypes.c_uint32), + ('n_success', ctypes.c_uint32), +] + +class struct_kfd_ioctl_unmap_memory_from_gpu_args(Structure): + pass + +struct_kfd_ioctl_unmap_memory_from_gpu_args._pack_ = 1 # source:False +struct_kfd_ioctl_unmap_memory_from_gpu_args._fields_ = [ + ('handle', ctypes.c_uint64), + ('device_ids_array_ptr', ctypes.c_uint64), + ('n_devices', ctypes.c_uint32), + ('n_success', ctypes.c_uint32), +] + +class struct_kfd_ioctl_alloc_queue_gws_args(Structure): + pass + +struct_kfd_ioctl_alloc_queue_gws_args._pack_ = 1 # source:False +struct_kfd_ioctl_alloc_queue_gws_args._fields_ = [ + ('queue_id', ctypes.c_uint32), + ('num_gws', ctypes.c_uint32), + ('first_gws', ctypes.c_uint32), + ('pad', ctypes.c_uint32), +] + +class struct_kfd_ioctl_get_dmabuf_info_args(Structure): + pass + +struct_kfd_ioctl_get_dmabuf_info_args._pack_ = 1 # source:False +struct_kfd_ioctl_get_dmabuf_info_args._fields_ = [ + ('size', ctypes.c_uint64), + ('metadata_ptr', ctypes.c_uint64), + ('metadata_size', ctypes.c_uint32), + ('gpu_id', ctypes.c_uint32), + ('flags', ctypes.c_uint32), + ('dmabuf_fd', ctypes.c_uint32), +] + +class struct_kfd_ioctl_import_dmabuf_args(Structure): + pass + +struct_kfd_ioctl_import_dmabuf_args._pack_ = 1 # source:False +struct_kfd_ioctl_import_dmabuf_args._fields_ = [ + ('va_addr', ctypes.c_uint64), + ('handle', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('dmabuf_fd', ctypes.c_uint32), +] + + +# values for enumeration 'kfd_smi_event' +kfd_smi_event__enumvalues = { + 0: 'KFD_SMI_EVENT_NONE', + 1: 'KFD_SMI_EVENT_VMFAULT', + 2: 'KFD_SMI_EVENT_THERMAL_THROTTLE', + 3: 'KFD_SMI_EVENT_GPU_PRE_RESET', + 4: 'KFD_SMI_EVENT_GPU_POST_RESET', +} +KFD_SMI_EVENT_NONE = 0 +KFD_SMI_EVENT_VMFAULT = 1 +KFD_SMI_EVENT_THERMAL_THROTTLE = 2 +KFD_SMI_EVENT_GPU_PRE_RESET = 3 +KFD_SMI_EVENT_GPU_POST_RESET = 4 +kfd_smi_event = ctypes.c_uint32 # enum +class struct_kfd_ioctl_smi_events_args(Structure): + pass + +struct_kfd_ioctl_smi_events_args._pack_ = 1 # source:False +struct_kfd_ioctl_smi_events_args._fields_ = [ + ('gpuid', ctypes.c_uint32), + ('anon_fd', ctypes.c_uint32), +] + + +# values for enumeration 'kfd_mmio_remap' +kfd_mmio_remap__enumvalues = { + 0: 'KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL', + 4: 'KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL', +} +KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0 +KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4 +kfd_mmio_remap = ctypes.c_uint32 # enum + +# values for enumeration 'kfd_ioctl_svm_op' +kfd_ioctl_svm_op__enumvalues = { + 0: 'KFD_IOCTL_SVM_OP_SET_ATTR', + 1: 'KFD_IOCTL_SVM_OP_GET_ATTR', +} +KFD_IOCTL_SVM_OP_SET_ATTR = 0 +KFD_IOCTL_SVM_OP_GET_ATTR = 1 +kfd_ioctl_svm_op = ctypes.c_uint32 # enum + +# values for enumeration 'kfd_ioctl_svm_location' +kfd_ioctl_svm_location__enumvalues = { + 0: 'KFD_IOCTL_SVM_LOCATION_SYSMEM', + 4294967295: 'KFD_IOCTL_SVM_LOCATION_UNDEFINED', +} +KFD_IOCTL_SVM_LOCATION_SYSMEM = 0 +KFD_IOCTL_SVM_LOCATION_UNDEFINED = 4294967295 +kfd_ioctl_svm_location = ctypes.c_uint32 # enum + +# values for enumeration 'kfd_ioctl_svm_attr_type' +kfd_ioctl_svm_attr_type__enumvalues = { + 0: 'KFD_IOCTL_SVM_ATTR_PREFERRED_LOC', + 1: 'KFD_IOCTL_SVM_ATTR_PREFETCH_LOC', + 2: 'KFD_IOCTL_SVM_ATTR_ACCESS', + 3: 'KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE', + 4: 'KFD_IOCTL_SVM_ATTR_NO_ACCESS', + 5: 'KFD_IOCTL_SVM_ATTR_SET_FLAGS', + 6: 'KFD_IOCTL_SVM_ATTR_CLR_FLAGS', + 7: 'KFD_IOCTL_SVM_ATTR_GRANULARITY', +} +KFD_IOCTL_SVM_ATTR_PREFERRED_LOC = 0 +KFD_IOCTL_SVM_ATTR_PREFETCH_LOC = 1 +KFD_IOCTL_SVM_ATTR_ACCESS = 2 +KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE = 3 +KFD_IOCTL_SVM_ATTR_NO_ACCESS = 4 +KFD_IOCTL_SVM_ATTR_SET_FLAGS = 5 +KFD_IOCTL_SVM_ATTR_CLR_FLAGS = 6 +KFD_IOCTL_SVM_ATTR_GRANULARITY = 7 +kfd_ioctl_svm_attr_type = ctypes.c_uint32 # enum +class struct_kfd_ioctl_svm_attribute(Structure): + pass + +struct_kfd_ioctl_svm_attribute._pack_ = 1 # source:False +struct_kfd_ioctl_svm_attribute._fields_ = [ + ('type', ctypes.c_uint32), + ('value', ctypes.c_uint32), +] + +class struct_kfd_ioctl_svm_args(Structure): + pass + +struct_kfd_ioctl_svm_args._pack_ = 1 # source:False +struct_kfd_ioctl_svm_args._fields_ = [ + ('start_addr', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('op', ctypes.c_uint32), + ('nattr', ctypes.c_uint32), + ('attrs', struct_kfd_ioctl_svm_attribute * 0), +] + +class struct_kfd_ioctl_set_xnack_mode_args(Structure): + pass + +struct_kfd_ioctl_set_xnack_mode_args._pack_ = 1 # source:False +struct_kfd_ioctl_set_xnack_mode_args._fields_ = [ + ('xnack_enabled', ctypes.c_int32), +] + +__all__ = \ + ['AMDKFD_COMMAND_END', 'AMDKFD_COMMAND_START', + 'AMDKFD_IOCTL_BASE', 'KFD_HW_EXCEPTION_ECC', + 'KFD_HW_EXCEPTION_GPU_HANG', 'KFD_HW_EXCEPTION_PER_ENGINE_RESET', + 'KFD_HW_EXCEPTION_WHOLE_GPU_RESET', 'KFD_IOCTL_H_INCLUDED', + 'KFD_IOCTL_MAJOR_VERSION', 'KFD_IOCTL_MINOR_VERSION', + 'KFD_IOCTL_SVM_ATTR_ACCESS', 'KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE', + 'KFD_IOCTL_SVM_ATTR_CLR_FLAGS', 'KFD_IOCTL_SVM_ATTR_GRANULARITY', + 'KFD_IOCTL_SVM_ATTR_NO_ACCESS', + 'KFD_IOCTL_SVM_ATTR_PREFERRED_LOC', + 'KFD_IOCTL_SVM_ATTR_PREFETCH_LOC', 'KFD_IOCTL_SVM_ATTR_SET_FLAGS', + 'KFD_IOCTL_SVM_FLAG_COHERENT', 'KFD_IOCTL_SVM_FLAG_GPU_EXEC', + 'KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY', 'KFD_IOCTL_SVM_FLAG_GPU_RO', + 'KFD_IOCTL_SVM_FLAG_HIVE_LOCAL', 'KFD_IOCTL_SVM_FLAG_HOST_ACCESS', + 'KFD_IOCTL_SVM_LOCATION_SYSMEM', + 'KFD_IOCTL_SVM_LOCATION_UNDEFINED', 'KFD_IOCTL_SVM_OP_GET_ATTR', + 'KFD_IOCTL_SVM_OP_SET_ATTR', + 'KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM', + 'KFD_IOC_ALLOC_MEM_FLAGS_COHERENT', + 'KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL', + 'KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE', + 'KFD_IOC_ALLOC_MEM_FLAGS_GTT', + 'KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP', + 'KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE', + 'KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC', + 'KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED', + 'KFD_IOC_ALLOC_MEM_FLAGS_USERPTR', 'KFD_IOC_ALLOC_MEM_FLAGS_VRAM', + 'KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE', + 'KFD_IOC_CACHE_POLICY_COHERENT', + 'KFD_IOC_CACHE_POLICY_NONCOHERENT', 'KFD_IOC_EVENT_DEBUG_EVENT', + 'KFD_IOC_EVENT_DEVICESTATECHANGE', 'KFD_IOC_EVENT_HW_EXCEPTION', + 'KFD_IOC_EVENT_MEMORY', 'KFD_IOC_EVENT_NODECHANGE', + 'KFD_IOC_EVENT_PROFILE_EVENT', 'KFD_IOC_EVENT_QUEUE_EVENT', + 'KFD_IOC_EVENT_SIGNAL', 'KFD_IOC_EVENT_SYSTEM_EVENT', + 'KFD_IOC_QUEUE_TYPE_COMPUTE', 'KFD_IOC_QUEUE_TYPE_COMPUTE_AQL', + 'KFD_IOC_QUEUE_TYPE_SDMA', 'KFD_IOC_QUEUE_TYPE_SDMA_XGMI', + 'KFD_IOC_WAIT_RESULT_COMPLETE', 'KFD_IOC_WAIT_RESULT_FAIL', + 'KFD_IOC_WAIT_RESULT_TIMEOUT', 'KFD_MAX_QUEUE_PERCENTAGE', + 'KFD_MAX_QUEUE_PRIORITY', 'KFD_MEM_ERR_GPU_HANG', + 'KFD_MEM_ERR_NO_RAS', 'KFD_MEM_ERR_POISON_CONSUMED', + 'KFD_MEM_ERR_SRAM_ECC', 'KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL', + 'KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL', 'KFD_SIGNAL_EVENT_LIMIT', + 'KFD_SMI_EVENT_GPU_POST_RESET', 'KFD_SMI_EVENT_GPU_PRE_RESET', + 'KFD_SMI_EVENT_NONE', 'KFD_SMI_EVENT_THERMAL_THROTTLE', + 'KFD_SMI_EVENT_VMFAULT', 'MAX_ALLOWED_AW_BUFF_SIZE', + 'MAX_ALLOWED_NUM_POINTS', 'MAX_ALLOWED_WAC_BUFF_SIZE', + 'NUM_OF_SUPPORTED_GPUS', 'kfd_ioctl_svm_attr_type', + 'kfd_ioctl_svm_location', 'kfd_ioctl_svm_op', 'kfd_mmio_remap', + 'kfd_smi_event', 'struct_kfd_event_data', + 'struct_kfd_hsa_hw_exception_data', + 'struct_kfd_hsa_memory_exception_data', + 'struct_kfd_ioctl_acquire_vm_args', + 'struct_kfd_ioctl_alloc_memory_of_gpu_args', + 'struct_kfd_ioctl_alloc_queue_gws_args', + 'struct_kfd_ioctl_create_event_args', + 'struct_kfd_ioctl_create_queue_args', + 'struct_kfd_ioctl_dbg_address_watch_args', + 'struct_kfd_ioctl_dbg_register_args', + 'struct_kfd_ioctl_dbg_unregister_args', + 'struct_kfd_ioctl_dbg_wave_control_args', + 'struct_kfd_ioctl_destroy_event_args', + 'struct_kfd_ioctl_destroy_queue_args', + 'struct_kfd_ioctl_free_memory_of_gpu_args', + 'struct_kfd_ioctl_get_clock_counters_args', + 'struct_kfd_ioctl_get_dmabuf_info_args', + 'struct_kfd_ioctl_get_process_apertures_args', + 'struct_kfd_ioctl_get_process_apertures_new_args', + 'struct_kfd_ioctl_get_queue_wave_state_args', + 'struct_kfd_ioctl_get_tile_config_args', + 'struct_kfd_ioctl_get_version_args', + 'struct_kfd_ioctl_import_dmabuf_args', + 'struct_kfd_ioctl_map_memory_to_gpu_args', + 'struct_kfd_ioctl_reset_event_args', + 'struct_kfd_ioctl_set_cu_mask_args', + 'struct_kfd_ioctl_set_event_args', + 'struct_kfd_ioctl_set_memory_policy_args', + 'struct_kfd_ioctl_set_scratch_backing_va_args', + 'struct_kfd_ioctl_set_trap_handler_args', + 'struct_kfd_ioctl_set_xnack_mode_args', + 'struct_kfd_ioctl_smi_events_args', 'struct_kfd_ioctl_svm_args', + 'struct_kfd_ioctl_svm_attribute', + 'struct_kfd_ioctl_unmap_memory_from_gpu_args', + 'struct_kfd_ioctl_update_queue_args', + 'struct_kfd_ioctl_wait_events_args', + 'struct_kfd_memory_exception_failure', + 'struct_kfd_process_device_apertures', 'union_kfd_event_data_0'] diff --git a/tinygrad/runtime/ops_kfd.py b/tinygrad/runtime/ops_kfd.py new file mode 100644 index 0000000000..a3f4c53b32 --- /dev/null +++ b/tinygrad/runtime/ops_kfd.py @@ -0,0 +1,333 @@ +from __future__ import annotations +from typing import Tuple +import os, fcntl, ctypes, functools, re, pathlib, mmap, struct +from tinygrad.device import Compiled, LRUAllocator, Compiler, BufferOptions, CompilerOptions +from tinygrad.helpers import getenv, from_mv, init_c_struct_t, to_mv, round_up +from tinygrad.renderer.cstyle import HIPRenderer +from tinygrad.runtime.driver.hip_comgr import compile_hip +import tinygrad.runtime.autogen.kfd as kfd +import tinygrad.runtime.autogen.hsa as hsa +import tinygrad.runtime.autogen.amd_sdma as amd_sdma +if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 + +libc = ctypes.CDLL("libc.so.6") +libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long] +libc.mmap.restype = ctypes.c_void_p + +def node_sysfs_path(node_id, file): return f"/sys/devices/virtual/kfd/kfd/topology/nodes/{node_id}/{file}" + +def kfd_ioctl(idir, nr, user_struct, fd, made_struct=None, **kwargs): + made = made_struct or user_struct(**kwargs) + ret = fcntl.ioctl(fd, (idir<<30) | (ctypes.sizeof(made)<<16) | (ord('K')<<8) | nr, made) + if ret != 0: raise RuntimeError(f"ioctl returned {ret}") + return made + +def ioctls_from_header(): + #hdr = pathlib.Path("/usr/include/linux/kfd_ioctl.h").read_text().replace("\\\n", "") + #pattern = r'#define\s+(AMDKFD_IOC_[A-Z0-9_]+)\s+AMDKFD_(IOW?R?)\((0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)' + #matches = re.findall(pattern, hdr, re.MULTILINE) + # get this from python instead + hdrpy = (pathlib.Path(__file__).parent / "autogen" / "kfd.py").read_text() + pattern = r'# (AMDKFD_IOC_[A-Z0-9_]+)\s=\s_(IOW?R?).*\(( 0x[0-9a-fA-F]+) ,\s+struct\s([A-Za-z0-9_]+)\s+\)' + matches = re.findall(pattern, hdrpy, re.MULTILINE) + idirs = {"IOW": 1, "IOR": 2, "IOWR": 3} + fxns = {name.replace("AMDKFD_IOC_", "").lower(): + functools.partial(kfd_ioctl, idirs[idir], int(nr, 0x10), getattr(kfd, "struct_"+sname)) + for name, idir, nr, sname in matches} + return type("KIO", (object, ), fxns) +kio = ioctls_from_header() + +def create_sdma_packets(): + # TODO: clean up this, if we want to keep it + structs = {} + for name,pkt in [(name,s) for name,s in amd_sdma.__dict__.items() if name.startswith("struct_SDMA_PKT_") and name.endswith("_TAG")]: + names = set() + fields = [] + for pkt_fields in pkt._fields_: + if not pkt_fields[0].endswith("_UNION"): fields.append(pkt_fields) + else: + assert pkt_fields[1]._fields_[0][0] == '_0' + for union_fields in pkt_fields[1]._fields_[0][1]._fields_: + fname = union_fields[0] + if fname in names: fname = pkt_fields[0]+fname + names.add(fname) + if fname.endswith("_63_32") and fields[-1][0].endswith("_31_0"): + fields[-1] = tuple([fname[:-6], ctypes.c_ulong, 64]) # merge together 64-bit fields + else: + fields.append(tuple([fname, *union_fields[1:]])) + new_name = name[16:-4].lower() + structs[new_name] = init_c_struct_t(tuple(fields)) + assert ctypes.sizeof(structs[new_name]) == ctypes.sizeof(pkt), f"{ctypes.sizeof(structs[new_name])} != {ctypes.sizeof(pkt)}" + return type("SDMA_PKTS", (object, ), structs) +sdma_pkts = create_sdma_packets() + +class KFDCompiler(Compiler): + compiler_opts = CompilerOptions("KFD", has_tensor_cores=True, shared_max=65536) + def __init__(self, arch:str): + self.arch = arch + super().__init__(f"compile_hip_{self.arch}") + def render(self, name:str, uops) -> str: return HIPRenderer(name, uops) + def compile(self, src:str) -> bytes: return compile_hip(src, self.arch) + +AQL_PACKET_SIZE = ctypes.sizeof(hsa.hsa_kernel_dispatch_packet_t) +SDMA_MAX_COPY_SIZE = 0x400000 + +DISPATCH_KERNEL_SETUP = 3 << hsa.HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS +DISPATCH_KERNEL_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER +DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE +DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE +DISPATCH_KERNEL_HEADER |= hsa.HSA_PACKET_TYPE_KERNEL_DISPATCH << hsa.HSA_PACKET_HEADER_TYPE + +SHT_PROGBITS = 0x1 +SHF_ALLOC = 0x2 + +class KFDProgram: + def __init__(self, device:KFDDevice, name:str, lib:bytes): + # TODO; this API needs the type signature of the function and global_size/local_size + self.device, self.name, self.lib = device, name, lib + + _phoff, _shoff, _flags, _ehsize, _phentsize, _phnum, _shentsize, _shnum, _shstrndx = struct.unpack_from(" {self.device.max_private_segment_size=}" + + # NOTE: no programs are ever freed + def __del__(self): kio.free_memory_of_gpu(KFDDevice.kfd, handle=self.lib_gpu.handle) + + def __call__(self, *args, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), vals:Tuple[int, ...]=(), wait=False): + if not hasattr(self, "args_struct_t"): + self.args_struct_t = init_c_struct_t(tuple([(f'f{i}', ctypes.c_void_p) for i in range(len(args))] + + [(f'v{i}', ctypes.c_int) for i in range(len(vals))])) + if ctypes.sizeof(self.args_struct_t) != self.kernargs_segment_size: + raise RuntimeError(f"HSAProgram.__call__: incorrect args struct size {ctypes.sizeof(self.args_struct_t)} != {self.kernargs_segment_size}") + args_st = self.args_struct_t.from_address(self.device.kernargs.va_addr) + for i in range(len(args)): args_st.__setattr__(f'f{i}', args[i].va_addr) + for i in range(len(vals)): args_st.__setattr__(f'v{i}', vals[i]) + + self.device.completion_signal.value = 1 # reset the signal before call + packet = hsa.hsa_kernel_dispatch_packet_t.from_address(self.device.aql_ring.va_addr + + (self.device.aql_doorbell_value*AQL_PACKET_SIZE) % self.device.aql_ring.size) + packet.workgroup_size_x, packet.workgroup_size_y, packet.workgroup_size_z = local_size + packet.reserved0 = 0 + packet.grid_size_x, packet.grid_size_y, packet.grid_size_z = tuple(g*l for g,l in zip(global_size, local_size)) + packet.kernel_object = self.handle + packet.kernarg_address = self.device.kernargs.va_addr + packet.group_segment_size = self.group_segment_size + packet.private_segment_size = self.private_segment_size # what it this and why doesn't it work? (see TestOps.test_dilated_conv_transpose2d) + packet.reserved2 = 0 + packet.completion_signal = hsa.hsa_signal_t(ctypes.addressof(self.device.completion_signal)) + packet.setup = DISPATCH_KERNEL_SETUP + packet.header = DISPATCH_KERNEL_HEADER + + # one pending packet + ring doorbell + self.device.amd_aql_queue.write_dispatch_id = self.device.aql_doorbell_value + 1 + self.device.aql_doorbell[0] = self.device.aql_doorbell_value + self.device.aql_doorbell_value += 1 + + evt_arr = (kfd.struct_kfd_event_data * 1)() + evt_arr[0].event_id = self.device.completion_signal.event_id + kio.wait_events(KFDDevice.kfd, events_ptr=ctypes.addressof(evt_arr), num_events=1, wait_for_all=1, timeout=1000) + + assert (wp:=self.device.amd_aql_queue.write_dispatch_id) == (rp:=self.device.amd_aql_queue.read_dispatch_id), f"didn't run {wp} != {rp}" + if wait: return (self.device.completion_signal.end_ts-self.device.completion_signal.start_ts)/1e9 + +class KFDAllocator(LRUAllocator): + def __init__(self, device:KFDDevice): + self.device = device + super().__init__() + + def _alloc(self, size:int, options:BufferOptions): + if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True) + else: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=True) + + def copyin(self, dest, src: memoryview): + # TODO: need to make the address visible to gpu and pass it directly to sdma. + self.device._map_userptr_to_gpu(ctypes.addressof(from_mv(src).contents), src.nbytes) + self.device.completion_signal.value = 1 + self.device._submit_sdma(dest.va_addr, ctypes.addressof(from_mv(src).contents), src.nbytes, completion_signal=self.device.completion_signal) + evt_arr = (kfd.struct_kfd_event_data * 1)() + evt_arr[0].event_id = self.device.completion_signal.event_id + kio.wait_events(KFDDevice.kfd, events_ptr=ctypes.addressof(evt_arr), num_events=1, wait_for_all=1, timeout=1000) + + def copyout(self, dest:memoryview, src): + self.device._map_userptr_to_gpu(ctypes.addressof(from_mv(dest).contents), dest.nbytes) + self.device.completion_signal.value = 1 + self.device._submit_sdma(ctypes.addressof(from_mv(dest).contents), src.va_addr, dest.nbytes, completion_signal=self.device.completion_signal) + evt_arr = (kfd.struct_kfd_event_data * 1)() + evt_arr[0].event_id = self.device.completion_signal.event_id + kio.wait_events(KFDDevice.kfd, events_ptr=ctypes.addressof(evt_arr), num_events=1, wait_for_all=1, timeout=1000) + +MAP_FIXED, MAP_NORESERVE = 0x10, 0x400 +class KFDDevice(Compiled): + kfd:int = -1 + + def _map_userptr_to_gpu(self, addr, size): + self.map_uptr2gpu_struct.start_addr = addr&~0xfff + self.map_uptr2gpu_struct.size = round_up(size+addr-(addr&~0xfff), 0x1000) + kio.svm(self.kfd, made_struct=self.map_uptr2gpu_struct) + + def _gpu_alloc(self, size:int, flags:int, uncached=False, public=False, map_to_gpu=True): + flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE + if uncached: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED + if public: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC + if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR: + buf = addr = libc.mmap(0, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS, -1, 0) + else: + buf, addr = 0, libc.mmap(0, size, 0, mmap.MAP_PRIVATE|mmap.MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) + assert addr != 0xffffffffffffffff + mem = kio.alloc_memory_of_gpu(self.kfd, va_addr=addr, size=size, gpu_id=self.gpu_id, flags=flags, mmap_offset=buf) + if not (flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR): + buf = libc.mmap(mem.va_addr, mem.size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|MAP_FIXED, self.drm_fd, mem.mmap_offset) + assert buf != 0xffffffffffffffff + assert addr == buf == mem.va_addr + if map_to_gpu: + arr = (ctypes.c_int32 * 1)(self.gpu_id) + stm = kio.map_memory_to_gpu(self.kfd, handle=mem.handle, device_ids_array_ptr=ctypes.addressof(arr), n_devices=1) + assert stm.n_success == 1 + return mem + + def __init__(self, device:str=""): + if KFDDevice.kfd == -1: KFDDevice.kfd = os.open("/dev/kfd", os.O_RDWR) + self.device_id = int(device.split(":")[1]) if ":" in device else 0 + with open(node_sysfs_path(self.device_id+1, "gpu_id"), "r") as f: self.gpu_id = int(f.read()) + with open(node_sysfs_path(self.device_id+1, "properties"), "r") as f: self.properties = {line.split()[0]: int(line.split()[1]) for line in f} + self.drm_fd = os.open(f"/dev/dri/renderD{self.properties['drm_render_minor']}", os.O_RDWR) + self.arch = f"gfx{self.properties['gfx_target_version']//100}" + kio.acquire_vm(KFDDevice.kfd, drm_fd=self.drm_fd, gpu_id=self.gpu_id) + + self.event_page = self._gpu_alloc(0x8000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True) + self.sync_event = kio.create_event(KFDDevice.kfd, event_page_offset=self.event_page.handle, auto_reset=1) + self.eop_buffer = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + self.aql_ring = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True) + self.signals_page = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True) + self.gart = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True) + self.kernargs = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + self.ctx_save_restore_address = self._gpu_alloc(0x2C02000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + + self.completion_signal = hsa.amd_signal_t.from_address(self.signals_page.va_addr) + self.completion_signal.value = 1 + self.completion_signal.kind = hsa.AMD_SIGNAL_KIND_USER + self.completion_signal.event_mailbox_ptr = self.event_page.va_addr + self.sync_event.event_slot_index*8 + self.completion_signal.event_id = self.sync_event.event_id + + # AQL Queue + self.amd_aql_queue = hsa.amd_queue_t.from_address(self.gart.va_addr) + self.amd_aql_queue.write_dispatch_id = 0 + self.amd_aql_queue.read_dispatch_id = 0 + self.amd_aql_queue.read_dispatch_id_field_base_byte_offset = getattr(hsa.amd_queue_t, 'read_dispatch_id').offset + self.amd_aql_queue.queue_properties = hsa.AMD_QUEUE_PROPERTIES_IS_PTR64 | hsa.AMD_QUEUE_PROPERTIES_ENABLE_PROFILING + + self.amd_aql_queue.max_cu_id = self.properties['simd_count'] // self.properties['simd_per_cu'] - 1 + self.amd_aql_queue.max_wave_id = self.properties['max_waves_per_simd'] * self.properties['simd_per_cu'] - 1 + + # scratch setup + self.max_private_segment_size = 256 + self.scratch_len = self.max_private_segment_size * (self.amd_aql_queue.max_cu_id + 1) * (self.amd_aql_queue.max_wave_id + 1) + self.scratch = self._gpu_alloc(self.scratch_len, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + self.amd_aql_queue.scratch_backing_memory_location = self.scratch.va_addr + self.amd_aql_queue.scratch_backing_memory_byte_size = self.scratch_len + self.amd_aql_queue.scratch_wave64_lane_byte_size = self.max_private_segment_size * (self.amd_aql_queue.max_wave_id + 1) // 64 + self.amd_aql_queue.scratch_resource_descriptor[0] = self.scratch.va_addr & 0xFFFFFFFF + self.amd_aql_queue.scratch_resource_descriptor[1] = ((self.scratch.va_addr >> 32) & 0xFFFF) | (1 << 30) # va_hi | SWIZZLE_ENABLE + self.amd_aql_queue.scratch_resource_descriptor[2] = self.scratch_len & 0xFFFFFFFF + self.amd_aql_queue.scratch_resource_descriptor[3] = 0x20814fac # FORMAT=BUF_FORMAT_32_UINT,OOB_SELECT=2,ADD_TID_ENABLE=1,TYPE=SQ_RSRC_BUF,SQ_SELs + + wave_scratch = (((self.amd_aql_queue.max_wave_id + 1) * self.max_private_segment_size + 255) // 256) + self.amd_aql_queue.compute_tmpring_size = wave_scratch << 12 | (self.amd_aql_queue.max_cu_id + 1) + + self.aql_queue = kio.create_queue(KFDDevice.kfd, ring_base_address=self.aql_ring.va_addr, ring_size=self.aql_ring.size, gpu_id=self.gpu_id, + queue_type=kfd.KFD_IOC_QUEUE_TYPE_COMPUTE_AQL, queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY, + eop_buffer_address=self.eop_buffer.va_addr, eop_buffer_size=self.eop_buffer.size, + ctx_save_restore_address=self.ctx_save_restore_address.va_addr, ctx_save_restore_size=self.ctx_save_restore_address.size, + ctl_stack_size = 0xa000, + write_pointer_address=self.gart.va_addr + getattr(hsa.amd_queue_t, 'write_dispatch_id').offset, + read_pointer_address=self.gart.va_addr + getattr(hsa.amd_queue_t, 'read_dispatch_id').offset) + + self.doorbells_base = self.aql_queue.doorbell_offset & (~0xfff) + self.doorbells = libc.mmap(0, 8192, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, KFDDevice.kfd, self.doorbells_base) + self.aql_doorbell = to_mv(self.doorbells + self.aql_queue.doorbell_offset - self.doorbells_base, 4).cast("I") + self.aql_doorbell_value = 0 + + # SDMA Queue + self.sdma_ring = self._gpu_alloc(1 << 20, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True) + self.sdma_queue = kio.create_queue(KFDDevice.kfd, ring_base_address=self.sdma_ring.va_addr, ring_size=self.sdma_ring.size, gpu_id=self.gpu_id, + queue_type=kfd.KFD_IOC_QUEUE_TYPE_SDMA, queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY, + write_pointer_address=self.gart.va_addr + 0x100, read_pointer_address=self.gart.va_addr + 0x108) + + self.sdma_read_pointer = to_mv(self.sdma_queue.read_pointer_address, 8).cast("Q") + self.sdma_write_pointer = to_mv(self.sdma_queue.write_pointer_address, 8).cast("Q") + self.sdma_doorbell = to_mv(self.doorbells + self.sdma_queue.doorbell_offset - self.doorbells_base, 4).cast("I") + self.sdma_doorbell_value = 0 + + # prebuilt packets + self.sdma_flush_hdp_pkt = sdma_pkts.hdp_flush(0x8, 0x0, 0x80000000, 0x0, 0x0, 0x0) + self.sdma_cache_inv = sdma_pkts.gcr(op=amd_sdma.SDMA_OP_GCR, sub_op=amd_sdma.SDMA_SUBOP_USER_GCR, GCR_CONTROL_GL2_WB=1, GCR_CONTROL_GLK_WB=1, + GCR_CONTROL_GL2_INV=1, GCR_CONTROL_GL1_INV=1, GCR_CONTROL_GLV_INV=1, GCR_CONTROL_GLK_INV=1, + GCR_CONTROL_GL2_RANGE=0) + self.sdma_cache_wb = sdma_pkts.gcr(op=amd_sdma.SDMA_OP_GCR, sub_op=amd_sdma.SDMA_SUBOP_USER_GCR, GCR_CONTROL_GL2_WB=1, GCR_CONTROL_GLK_WB=1, + GCR_CONTROL_GL2_RANGE=0) + + # Helpers + map_uptr2gpu_struct_t = init_c_struct_t(tuple(kfd.struct_kfd_ioctl_svm_args._fields_[:-1]+[('attrs', kfd.struct_kfd_ioctl_svm_attribute*2)])) # type: ignore + self.map_uptr2gpu_struct = map_uptr2gpu_struct_t(nattr=2, op=0x0) + self.map_uptr2gpu_struct.attrs[0].type = kfd.KFD_IOCTL_SVM_ATTR_SET_FLAGS + self.map_uptr2gpu_struct.attrs[0].value = kfd.KFD_IOCTL_SVM_FLAG_COHERENT + self.map_uptr2gpu_struct.attrs[1].type = kfd.KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE + self.map_uptr2gpu_struct.attrs[1].value = self.gpu_id + + super().__init__(device, KFDAllocator(self), KFDCompiler(self.arch), functools.partial(KFDProgram, self)) + + def _submit_sdma(self, dest, src, copy_size, wait_signals=None, completion_signal=None): + def blit_sdma_command(cmd): + ctypes.memmove(self.sdma_ring.va_addr + (self.sdma_doorbell_value % self.sdma_ring.size), ctypes.addressof(cmd), sz:=ctypes.sizeof(cmd)) + self.sdma_doorbell_value += sz + + if wait_signals is not None: + # NOTE: we check only low 32 bits to be zeroed, we don't use higher values for signals + for sig in wait_signals: + poll_addr = ctypes.addressof(sig) + getattr(hsa.amd_signal_t, 'value').offset + blit_sdma_command(sdma_pkts.poll_regmem(op=amd_sdma.SDMA_OP_POLL_REGMEM, mem_poll=1, func=0x3, addr=poll_addr, + value=0, mask=0xffffffff, interval=0x04, retry_count=0xfff)) + + if completion_signal is not None: + blit_sdma_command(sdma_pkts.timestamp(op=amd_sdma.SDMA_OP_TIMESTAMP, sub_op=amd_sdma.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL, + addr=ctypes.addressof(completion_signal) + getattr(hsa.amd_signal_t, 'start_ts').offset)) + blit_sdma_command(self.sdma_flush_hdp_pkt) + blit_sdma_command(self.sdma_cache_inv) + + copied = 0 + copies_commands = (copy_size + SDMA_MAX_COPY_SIZE - 1) // SDMA_MAX_COPY_SIZE + for _ in range(copies_commands): + step_copy_size = min(copy_size - copied, SDMA_MAX_COPY_SIZE) + blit_sdma_command(sdma_pkts.copy_linear(op=amd_sdma.SDMA_OP_COPY, sub_op=amd_sdma.SDMA_SUBOP_COPY_LINEAR, + count=step_copy_size-1, src_addr=src+copied, dst_addr=dest+copied)) + copied += step_copy_size + + blit_sdma_command(self.sdma_cache_wb) + if completion_signal is not None: + blit_sdma_command(sdma_pkts.timestamp(op=amd_sdma.SDMA_OP_TIMESTAMP, sub_op=amd_sdma.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL, + addr=ctypes.addressof(completion_signal) + getattr(hsa.amd_signal_t, 'end_ts').offset)) + + if completion_signal is not None: + signal_addr = ctypes.addressof(completion_signal) + getattr(hsa.amd_signal_t, 'value').offset + blit_sdma_command(sdma_pkts.atomic(op=amd_sdma.SDMA_OP_ATOMIC, operation=amd_sdma.SDMA_ATOMIC_ADD64, addr=signal_addr, src_data=(1<<64)-1)) + if completion_signal.event_mailbox_ptr != 0: + blit_sdma_command(sdma_pkts.fence(op=amd_sdma.SDMA_OP_FENCE, mtype=3, addr=completion_signal.event_mailbox_ptr, + data=completion_signal.event_id)) + blit_sdma_command(sdma_pkts.trap(op=amd_sdma.SDMA_OP_TRAP, int_ctx=completion_signal.event_id)) + + self.sdma_write_pointer[0] = self.sdma_doorbell_value + self.sdma_doorbell[0] = self.sdma_doorbell_value