From 8cd22df2dd24bc72a9a1ad324e5430d6c442f41c Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Fri, 23 Jan 2026 00:08:45 +0300 Subject: [PATCH] amd: alive wgps (#14149) * amd: disabled wgps * l * wgp * uoops * mockgpu * drm * ad this * fi * reg --- .github/workflows/autogen.yml | 4 +- extra/hip_gpu_driver/amdgpu_drm.h | 1740 ++++++++++++++++++++++++ test/mockgpu/amd/amddriver.py | 11 + tinygrad/runtime/autogen/__init__.py | 1 + tinygrad/runtime/autogen/amdgpu_drm.py | 1593 ++++++++++++++++++++++ tinygrad/runtime/ops_amd.py | 14 +- 6 files changed, 3359 insertions(+), 4 deletions(-) create mode 100644 extra/hip_gpu_driver/amdgpu_drm.h create mode 100644 tinygrad/runtime/autogen/amdgpu_drm.py diff --git a/.github/workflows/autogen.yml b/.github/workflows/autogen.yml index 874ea0eb14..486c2b5e35 100644 --- a/.github/workflows/autogen.yml +++ b/.github/workflows/autogen.yml @@ -40,13 +40,13 @@ jobs: mesa: 'true' pydeps: 'pyyaml mako' - name: Install autogen support packages - run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev + run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev libdrm-dev - name: Regenerate autogen files run: | find tinygrad/runtime/autogen -type f -name "*.py" -not -name "__init__.py" -not -name "comgr_3.py" -not -name "metal.py" -not -name "iokit.py" -not -name "corefoundation.py" -not -name "libclang.py" -delete python3 -c "from tinygrad.runtime.autogen import opencl" python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_570, nv_580, nv" - python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd" + python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd, amdgpu_drm" python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v13_0_6, smu_v14_0_2" python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio" python3 -c "from tinygrad.runtime.autogen import llvm" diff --git a/extra/hip_gpu_driver/amdgpu_drm.h b/extra/hip_gpu_driver/amdgpu_drm.h new file mode 100644 index 0000000000..f3223c05f7 --- /dev/null +++ b/extra/hip_gpu_driver/amdgpu_drm.h @@ -0,0 +1,1740 @@ +/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- + * + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin + * Gareth Hughes + * Keith Whitwell + */ + +#ifndef __AMDGPU_DRM_H__ +#define __AMDGPU_DRM_H__ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_AMDGPU_GEM_CREATE 0x00 +#define DRM_AMDGPU_GEM_MMAP 0x01 +#define DRM_AMDGPU_CTX 0x02 +#define DRM_AMDGPU_BO_LIST 0x03 +#define DRM_AMDGPU_CS 0x04 +#define DRM_AMDGPU_INFO 0x05 +#define DRM_AMDGPU_GEM_METADATA 0x06 +#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 +#define DRM_AMDGPU_GEM_VA 0x08 +#define DRM_AMDGPU_WAIT_CS 0x09 +#define DRM_AMDGPU_GEM_OP 0x10 +#define DRM_AMDGPU_GEM_USERPTR 0x11 +#define DRM_AMDGPU_WAIT_FENCES 0x12 +#define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 +/* not upstream */ +#define DRM_AMDGPU_GEM_DGMA 0x5c + +/* hybrid specific ioctls */ +#define DRM_AMDGPU_SEM 0x5b + +#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) +#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) +#define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) +#define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) +#define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) +#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) +#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) +#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) +#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va) +#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) +#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) +#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) +#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) + +#define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) + +/** + * DOC: memory domains + * + * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. + * Memory in this pool could be swapped out to disk if there is pressure. + * + * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the + * GPU's virtual address space via gart. Gart memory linearizes non-contiguous + * pages of system memory, allows GPU access system memory in a linearized + * fashion. + * + * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory + * carved out by the BIOS. + * + * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data + * across shader threads. + * + * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the + * execution of all the waves on a device. + * + * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines + * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. + */ +/* hybrid specific ioctls */ +#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) + +#define AMDGPU_GEM_DOMAIN_CPU 0x1 +#define AMDGPU_GEM_DOMAIN_GTT 0x2 +#define AMDGPU_GEM_DOMAIN_VRAM 0x4 +#define AMDGPU_GEM_DOMAIN_GDS 0x8 +#define AMDGPU_GEM_DOMAIN_GWS 0x10 +#define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_DGMA 0x400 +#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT 0x800 +#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ + AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | \ + AMDGPU_GEM_DOMAIN_OA |\ + AMDGPU_GEM_DOMAIN_DOORBELL |\ + AMDGPU_GEM_DOMAIN_DGMA |\ + AMDGPU_GEM_DOMAIN_DGMA_IMPORT) + +/* Flag that CPU access will be required for the case of VRAM domain */ +#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) +/* Flag that CPU access will not work, this VRAM domain is invisible */ +#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) +/* Flag that USWC attributes should be used for GTT */ +#define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) +/* Flag that the memory should be in VRAM and cleared */ +#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that allocating the BO should use linear VRAM */ +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. It should never + * be used by user space applications. + */ +#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) +/* Flag that BO may contain sensitive data that must be wiped before + * releasing the memory + */ +#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) +/* Flag that BO is shared coherently between multiple devices or CPU threads. + * May depend on GPU instructions to flush caches to system scope explicitly. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) +/* Flag that BO should not be cached by GPU. Coherent without having to flush + * GPU caches explicitly + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) +/* Flag that BO should be coherent across devices when using device-level + * atomics. May depend on GPU instructions to flush caches to device scope + * explicitly, promoting them to system scope automatically. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) + +/* hybrid specific */ +/* Flag that the memory should be in SPARSE resource */ +#define AMDGPU_GEM_CREATE_SPARSE (1ULL << 29) +/* Flag that the memory allocation should be from top of domain */ +#define AMDGPU_GEM_CREATE_TOP_DOWN (1ULL << 30) +/* Flag that the memory allocation should be pinned */ +#define AMDGPU_GEM_CREATE_NO_EVICT (1ULL << 31) + +struct drm_amdgpu_gem_create_in { + /** the requested memory size */ + __u64 bo_size; + /** physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; + /** the requested memory domains */ + __u64 domains; + /** allocation flags */ + __u64 domain_flags; +}; + +struct drm_amdgpu_gem_create_out { + /** returned GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +union drm_amdgpu_gem_create { + struct drm_amdgpu_gem_create_in in; + struct drm_amdgpu_gem_create_out out; +}; + +/** Opcode to create new residency list. */ +#define AMDGPU_BO_LIST_OP_CREATE 0 +/** Opcode to destroy previously created residency list */ +#define AMDGPU_BO_LIST_OP_DESTROY 1 +/** Opcode to update resource information in the list */ +#define AMDGPU_BO_LIST_OP_UPDATE 2 + +struct drm_amdgpu_bo_list_in { + /** Type of operation */ + __u32 operation; + /** Handle of list or 0 if we want to create one */ + __u32 list_handle; + /** Number of BOs in list */ + __u32 bo_number; + /** Size of each element describing BO */ + __u32 bo_info_size; + /** Pointer to array describing BOs */ + __u64 bo_info_ptr; +}; + +struct drm_amdgpu_bo_list_entry { + /** Handle of BO */ + __u32 bo_handle; + /** New (if specified) BO priority to be used during migration */ + __u32 bo_priority; +}; + +struct drm_amdgpu_bo_list_out { + /** Handle of resource list */ + __u32 list_handle; + __u32 _pad; +}; + +union drm_amdgpu_bo_list { + struct drm_amdgpu_bo_list_in in; + struct drm_amdgpu_bo_list_out out; +}; + +/* context related */ +#define AMDGPU_CTX_OP_ALLOC_CTX 1 +#define AMDGPU_CTX_OP_FREE_CTX 2 +#define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 + +/* GPU reset status */ +#define AMDGPU_CTX_NO_RESET 0 +/* this the context caused it */ +#define AMDGPU_CTX_GUILTY_RESET 1 +/* some other context caused it */ +#define AMDGPU_CTX_INNOCENT_RESET 2 +/* unknown cause */ +#define AMDGPU_CTX_UNKNOWN_RESET 3 + +/* indicate gpu reset occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) +/* indicate vram lost occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) + +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* + * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires + * CAP_SYS_NICE or DRM_MASTER +*/ +#define AMDGPU_CTX_PRIORITY_HIGH 512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 + +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + +struct drm_amdgpu_ctx_in { + /** AMDGPU_CTX_OP_* */ + __u32 op; + /** Flags */ + __u32 flags; + __u32 ctx_id; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; +}; + +union drm_amdgpu_ctx_out { + struct { + __u32 ctx_id; + __u32 _pad; + } alloc; + + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** Number of resets caused by this context so far. */ + __u32 hangs; + /** Reset status since the last call of the ioctl. */ + __u32 reset_status; + } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; +}; + +union drm_amdgpu_ctx { + struct drm_amdgpu_ctx_in in; + union drm_amdgpu_ctx_out out; +}; + +/* user queue IOCTL operations */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) + +/* + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue id passed for operation USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated with this userqueue client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds IP specific MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). + */ + __u64 mqd_size; +}; + +/* The structure to carry output of userqueue ops */ +struct drm_amdgpu_userq_out { + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ + __u32 queue_id; + __u32 _pad; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 eop_va; +}; + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. + */ + __u64 syncobj_handles; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u64 num_syncobj_handles; + /** + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the + * wait queue and maintain the fence driver references in it. + */ + __u32 waitq_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 syncobj_handles; + /** + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. + */ + __u64 syncobj_timeline_points; + /** + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; + /** + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. + */ + __u64 out_fences; +}; + +/* sem related */ +#define AMDGPU_SEM_OP_CREATE_SEM 1 +#define AMDGPU_SEM_OP_WAIT_SEM 2 +#define AMDGPU_SEM_OP_SIGNAL_SEM 3 +#define AMDGPU_SEM_OP_DESTROY_SEM 4 +#define AMDGPU_SEM_OP_IMPORT_SEM 5 +#define AMDGPU_SEM_OP_EXPORT_SEM 6 + +struct drm_amdgpu_sem_in { + /** AMDGPU_SEM_OP_* */ + uint32_t op; + uint32_t handle; + uint32_t ctx_id; + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + uint64_t seq; +}; + +union drm_amdgpu_sem_out { + int32_t fd; + uint32_t handle; +}; + +union drm_amdgpu_sem { + struct drm_amdgpu_sem_in in; + union drm_amdgpu_sem_out out; +}; + +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID 2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; + __u32 ctx_id; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) +#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) +#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) +#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) + +struct drm_amdgpu_gem_userptr { + __u64 addr; + __u64 size; + /* AMDGPU_GEM_USERPTR_* */ + __u32 flags; + /* Resulting GEM handle */ + __u32 handle; +}; + +#define AMDGPU_GEM_DGMA_IMPORT 0 +#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR 1 +struct drm_amdgpu_gem_dgma { + __u64 addr; + __u64 size; + __u32 op; + __u32 handle; +}; + +/* SI-CI-VI: */ +/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ +#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 +#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf +#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 +#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f +#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 +#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 +#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 +#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 +#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 +#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 +#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 +#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 +#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 +#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 + +/* GFX9 - GFX11: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f +#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 +#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF +#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 +#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 + +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression settings for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 + +/* Set/Get helpers for tiling flags. */ +#define AMDGPU_TILING_SET(field, value) \ + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) +#define AMDGPU_TILING_GET(value, field) \ + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) + +#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 +#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 + +/** The same structure is shared for input/output */ +struct drm_amdgpu_gem_metadata { + /** GEM Object handle */ + __u32 handle; + /** Do we want get or set metadata */ + __u32 op; + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** family specific tiling info */ + __u64 tiling_info; + __u32 data_size_bytes; + __u32 data[64]; + } data; +}; + +struct drm_amdgpu_gem_mmap_in { + /** the GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +struct drm_amdgpu_gem_mmap_out { + /** mmap offset from the vma offset manager */ + __u64 addr_ptr; +}; + +union drm_amdgpu_gem_mmap { + struct drm_amdgpu_gem_mmap_in in; + struct drm_amdgpu_gem_mmap_out out; +}; + +struct drm_amdgpu_gem_wait_idle_in { + /** GEM object handle */ + __u32 handle; + /** For future use, no flags defined so far */ + __u32 flags; + /** Absolute timeout to wait */ + __u64 timeout; +}; + +struct drm_amdgpu_gem_wait_idle_out { + /** BO status: 0 - BO is idle, 1 - BO is busy */ + __u32 status; + /** Returned current memory domain */ + __u32 domain; +}; + +union drm_amdgpu_gem_wait_idle { + struct drm_amdgpu_gem_wait_idle_in in; + struct drm_amdgpu_gem_wait_idle_out out; +}; + +struct drm_amdgpu_wait_cs_in { + /* Command submission handle + * handle equals 0 means none to wait for + * handle equals ~0ull means wait for the latest sequence number + */ + __u64 handle; + /** Absolute timeout to wait */ + __u64 timeout; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; +}; + +struct drm_amdgpu_wait_cs_out { + /** CS status: 0 - CS completed, 1 - CS still busy */ + __u64 status; +}; + +union drm_amdgpu_wait_cs { + struct drm_amdgpu_wait_cs_in in; + struct drm_amdgpu_wait_cs_out out; +}; + +struct drm_amdgpu_fence { + __u32 ctx_id; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u64 seq_no; +}; + +struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ + __u64 fences; + __u32 fence_count; + __u32 wait_all; + __u64 timeout_ns; +}; + +struct drm_amdgpu_wait_fences_out { + __u32 status; + __u32 first_signaled; +}; + +union drm_amdgpu_wait_fences { + struct drm_amdgpu_wait_fences_in in; + struct drm_amdgpu_wait_fences_out out; +}; + +#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 +#define AMDGPU_GEM_OP_SET_PLACEMENT 1 + +/* Sets or returns a value associated with a buffer. */ +struct drm_amdgpu_gem_op { + /** GEM object handle */ + __u32 handle; + /** AMDGPU_GEM_OP_* */ + __u32 op; + /** Input or return value */ + __u64 value; +}; + +#define AMDGPU_VA_OP_MAP 1 +#define AMDGPU_VA_OP_UNMAP 2 +#define AMDGPU_VA_OP_CLEAR 3 +#define AMDGPU_VA_OP_REPLACE 4 + +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + +/* Mapping flags */ +/* readable mapping */ +#define AMDGPU_VM_PAGE_READABLE (1 << 1) +/* writable mapping */ +#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) +/* executable mapping, new for VI */ +#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) +/* MTYPE flags use bit 5 to 8 */ +#define AMDGPU_VM_MTYPE_MASK (0xf << 5) +/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ +#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) +/* Use Non Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_NC (1 << 5) +/* Use Write Combine MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_WC (2 << 5) +/* Use Cache Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_CC (3 << 5) +/* Use UnCached MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_UC (4 << 5) +/* Use Read Write MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) + +struct drm_amdgpu_gem_va { + /** GEM object handle */ + __u32 handle; + __u32 _pad; + /** AMDGPU_VA_OP_* */ + __u32 operation; + /** AMDGPU_VM_PAGE_* */ + __u32 flags; + /** va address to assign . Must be correctly aligned.*/ + __u64 va_address; + /** Specify offset inside of BO to assign. Must be correctly aligned.*/ + __u64 offset_in_bo; + /** Specify mapping size. Must be correctly aligned. */ + __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; +}; + +#define AMDGPU_HW_IP_GFX 0 +#define AMDGPU_HW_IP_COMPUTE 1 +#define AMDGPU_HW_IP_DMA 2 +#define AMDGPU_HW_IP_UVD 3 +#define AMDGPU_HW_IP_VCE 4 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ +#define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_JPEG 8 +#define AMDGPU_HW_IP_VPE 9 +#define AMDGPU_HW_IP_NUM 10 + +#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 + +#define AMDGPU_CHUNK_ID_IB 0x01 +#define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 +#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 +#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 +#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a + +struct drm_amdgpu_cs_chunk { + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; +}; + +struct drm_amdgpu_cs_in { + /** Rendering context id */ + __u32 ctx_id; + /** Handle of resource list associated with CS */ + __u32 bo_list_handle; + __u32 num_chunks; + __u32 flags; + /** this points to __u64 * which point to cs chunks */ + __u64 chunks; +}; + +struct drm_amdgpu_cs_out { + __u64 handle; +}; + +union drm_amdgpu_cs { + struct drm_amdgpu_cs_in in; + struct drm_amdgpu_cs_out out; +}; + +/* Specify flags to be used for IB */ + +/* This IB should be submitted to CE */ +#define AMDGPU_IB_FLAG_CE (1<<0) + +/* Preamble flag, which means the IB could be dropped if no context switch */ +#define AMDGPU_IB_FLAG_PREAMBLE (1<<1) + +/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ +#define AMDGPU_IB_FLAG_PREEMPT (1<<2) + +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + +struct drm_amdgpu_cs_chunk_ib { + __u32 _pad; + /** AMDGPU_IB_FLAG_* */ + __u32 flags; + /** Virtual address to begin IB execution */ + __u64 va_start; + /** Size of submission */ + __u32 ib_bytes; + /** HW IP to submit to */ + __u32 ip_type; + /** HW IP index of the same type to submit to */ + __u32 ip_instance; + /** Ring index to submit to */ + __u32 ring; +}; + +struct drm_amdgpu_cs_chunk_dep { + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; + __u64 handle; +}; + +struct drm_amdgpu_cs_chunk_fence { + __u32 handle; + __u32 offset; +}; + +struct drm_amdgpu_cs_chunk_sem { + __u32 handle; +}; + +struct drm_amdgpu_cs_chunk_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + __u32 pad; + } in; + struct { + __u32 handle; + } out; +}; + +struct drm_amdgpu_cs_chunk_data { + union { + struct drm_amdgpu_cs_chunk_ib ib_data; + struct drm_amdgpu_cs_chunk_fence fence_data; + }; +}; + +#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 + +struct drm_amdgpu_cs_chunk_cp_gfx_shadow { + __u64 shadow_va; + __u64 csa_va; + __u64 gds_va; + __u64 flags; +}; + +/* + * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU + * + */ +#define AMDGPU_IDS_FLAGS_FUSION 0x1 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 +#define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 + +/* + * Query h/w info: Flag identifying VF/PF/PT mode + * + */ +#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 +#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 +#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 +#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 +#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 + +/* indicate if acceleration can be working */ +#define AMDGPU_INFO_ACCEL_WORKING 0x00 +/* get the crtc_id from the mode object id? */ +#define AMDGPU_INFO_CRTC_FROM_ID 0x01 +/* query hw IP info */ +#define AMDGPU_INFO_HW_IP_INFO 0x02 +/* query hw IP instance count for the specified type */ +#define AMDGPU_INFO_HW_IP_COUNT 0x03 +/* timestamp for GL_ARB_timer_query */ +#define AMDGPU_INFO_TIMESTAMP 0x05 +/* Query the firmware version */ +#define AMDGPU_INFO_FW_VERSION 0x0e + /* Subquery id: Query VCE firmware version */ + #define AMDGPU_INFO_FW_VCE 0x1 + /* Subquery id: Query UVD firmware version */ + #define AMDGPU_INFO_FW_UVD 0x2 + /* Subquery id: Query GMC firmware version */ + #define AMDGPU_INFO_FW_GMC 0x03 + /* Subquery id: Query GFX ME firmware version */ + #define AMDGPU_INFO_FW_GFX_ME 0x04 + /* Subquery id: Query GFX PFP firmware version */ + #define AMDGPU_INFO_FW_GFX_PFP 0x05 + /* Subquery id: Query GFX CE firmware version */ + #define AMDGPU_INFO_FW_GFX_CE 0x06 + /* Subquery id: Query GFX RLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC 0x07 + /* Subquery id: Query GFX MEC firmware version */ + #define AMDGPU_INFO_FW_GFX_MEC 0x08 + /* Subquery id: Query SMC firmware version */ + #define AMDGPU_INFO_FW_SMC 0x0a + /* Subquery id: Query SDMA firmware version */ + #define AMDGPU_INFO_FW_SDMA 0x0b + /* Subquery id: Query PSP SOS firmware version */ + #define AMDGPU_INFO_FW_SOS 0x0c + /* Subquery id: Query PSP ASD firmware version */ + #define AMDGPU_INFO_FW_ASD 0x0d + /* Subquery id: Query VCN firmware version */ + #define AMDGPU_INFO_FW_VCN 0x0e + /* Subquery id: Query GFX RLC SRLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f + /* Subquery id: Query GFX RLC SRLG firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 + /* Subquery id: Query GFX RLC SRLS firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 + /* Subquery id: Query DMCU firmware version */ + #define AMDGPU_INFO_FW_DMCU 0x12 + #define AMDGPU_INFO_FW_TA 0x13 + /* Subquery id: Query DMCUB firmware version */ + #define AMDGPU_INFO_FW_DMCUB 0x14 + /* Subquery id: Query TOC firmware version */ + #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 + /* Subquery id: Query GFX RLCP firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCP 0x17 + /* Subquery id: Query GFX RLCV firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCV 0x18 + /* Subquery id: Query MES_KIQ firmware version */ + #define AMDGPU_INFO_FW_MES_KIQ 0x19 + /* Subquery id: Query MES firmware version */ + #define AMDGPU_INFO_FW_MES 0x1a + /* Subquery id: Query IMU firmware version */ + #define AMDGPU_INFO_FW_IMU 0x1b + /* Subquery id: Query VPE firmware version */ + #define AMDGPU_INFO_FW_VPE 0x1c + +/* number of bytes moved for TTM migration */ +#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f +/* the used VRAM size */ +#define AMDGPU_INFO_VRAM_USAGE 0x10 +/* the used GTT size */ +#define AMDGPU_INFO_GTT_USAGE 0x11 +/* Information about GDS, etc. resource configuration */ +#define AMDGPU_INFO_GDS_CONFIG 0x13 +/* Query information about VRAM and GTT domains */ +#define AMDGPU_INFO_VRAM_GTT 0x14 +/* Query information about register in MMR address space*/ +#define AMDGPU_INFO_READ_MMR_REG 0x15 +/* Query information about device: rev id, family, etc. */ +#define AMDGPU_INFO_DEV_INFO 0x16 +/* visible vram usage */ +#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 +/* number of TTM buffer evictions */ +#define AMDGPU_INFO_NUM_EVICTIONS 0x18 +/* Query memory about VRAM and GTT domains */ +#define AMDGPU_INFO_MEMORY 0x19 +/* Query vce clock table */ +#define AMDGPU_INFO_VCE_CLOCK_TABLE 0x1A +/* Query vbios related information */ +#define AMDGPU_INFO_VBIOS 0x1B + /* Subquery id: Query vbios size */ + #define AMDGPU_INFO_VBIOS_SIZE 0x1 + /* Subquery id: Query vbios image */ + #define AMDGPU_INFO_VBIOS_IMAGE 0x2 + /* Subquery id: Query vbios info */ + #define AMDGPU_INFO_VBIOS_INFO 0x3 +/* Query UVD handles */ +#define AMDGPU_INFO_NUM_HANDLES 0x1C +/* Query sensor related information */ +#define AMDGPU_INFO_SENSOR 0x1D + /* Subquery id: Query GPU shader clock */ + #define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 + /* Subquery id: Query GPU memory clock */ + #define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 + /* Subquery id: Query GPU temperature */ + #define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 + /* Subquery id: Query GPU load */ + #define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 + /* Subquery id: Query average GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 + /* Subquery id: Query northbridge voltage */ + #define AMDGPU_INFO_SENSOR_VDDNB 0x6 + /* Subquery id: Query graphics voltage */ + #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 + /* Subquery id: Query GPU stable pstate shader clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 + /* Subquery id: Query GPU stable pstate memory clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 + /* Subquery id: Query GPU peak pstate shader clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa + /* Subquery id: Query GPU peak pstate memory clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb + /* Subquery id: Query input GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER 0xc +/* Number of VRAM page faults on CPU access. */ +#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS 0x21 + /* Subquery id: Decode */ + #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 + /* Subquery id: Encode */ + #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +/* Query the max number of IBs per gang per submission */ +#define AMDGPU_INFO_MAX_IBS 0x22 +/* query last page fault info */ +#define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 + +/* Hybrid Stack Specific Defs*/ +/* gpu capability */ +#define AMDGPU_INFO_CAPABILITY 0x50 +/* virtual range */ +#define AMDGPU_INFO_VIRTUAL_RANGE 0x51 +/* query pin memory capability */ +#define AMDGPU_CAPABILITY_PIN_MEM_FLAG (1 << 0) +/* query direct gma capability */ +#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG (1 << 1) + +#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 +#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff +#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 +#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff + +struct drm_amdgpu_query_fw { + /** AMDGPU_INFO_FW_* */ + __u32 fw_type; + /** + * Index of the IP if there are more IPs of + * the same type. + */ + __u32 ip_instance; + /** + * Index of the engine. Whether this is used depends + * on the firmware type. (e.g. MEC, SDMA) + */ + __u32 index; + __u32 _pad; +}; + +/* Input structure for the INFO ioctl */ +struct drm_amdgpu_info { + /* Where the return value will be stored */ + __u64 return_pointer; + /* The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write. */ + __u32 return_size; + /* The query request id. */ + __u32 query; + + union { + struct { + __u32 id; + __u32 _pad; + } mode_crtc; + + struct { + /** AMDGPU_HW_IP_* */ + __u32 type; + /** + * Index of the IP if there are more IPs of the same + * type. Ignored by AMDGPU_INFO_HW_IP_COUNT. + */ + __u32 ip_instance; + } query_hw_ip; + + struct { + __u32 dword_offset; + /** number of registers to read */ + __u32 count; + __u32 instance; + /** For future use, no flags defined so far */ + __u32 flags; + } read_mmr_reg; + + struct { + uint32_t aperture; + uint32_t _pad; + } virtual_range; + + struct drm_amdgpu_query_fw query_fw; + + struct { + __u32 type; + __u32 offset; + } vbios_info; + + struct { + __u32 type; + } sensor_info; + + struct { + __u32 type; + } video_cap; + }; +}; + +struct drm_amdgpu_info_gds { + /** GDS GFX partition size */ + __u32 gds_gfx_partition_size; + /** GDS compute partition size */ + __u32 compute_partition_size; + /** total GDS memory size */ + __u32 gds_total_size; + /** GWS size per GFX partition */ + __u32 gws_per_gfx_partition; + /** GSW size per compute partition */ + __u32 gws_per_compute_partition; + /** OA size per GFX partition */ + __u32 oa_per_gfx_partition; + /** OA size per compute partition */ + __u32 oa_per_compute_partition; + __u32 _pad; +}; + +struct drm_amdgpu_info_vram_gtt { + __u64 vram_size; + __u64 vram_cpu_accessible_size; + __u64 gtt_size; +}; + +struct drm_amdgpu_heap_info { + /** max. physical memory */ + __u64 total_heap_size; + + /** Theoretical max. available memory in the given heap */ + __u64 usable_heap_size; + + /** + * Number of bytes allocated in the heap. This includes all processes + * and private allocations in the kernel. It changes when new buffers + * are allocated, freed, and moved. It cannot be larger than + * heap_size. + */ + __u64 heap_usage; + + /** + * Theoretical possible max. size of buffer which + * could be allocated in the given heap + */ + __u64 max_allocation; +}; + +struct drm_amdgpu_memory_info { + struct drm_amdgpu_heap_info vram; + struct drm_amdgpu_heap_info cpu_accessible_vram; + struct drm_amdgpu_heap_info gtt; +}; + +struct drm_amdgpu_info_firmware { + __u32 ver; + __u32 feature; +}; + +struct drm_amdgpu_info_vbios { + __u8 name[64]; + __u8 vbios_pn[64]; + __u32 version; + __u32 pad; + __u8 vbios_ver_str[32]; + __u8 date[32]; +}; + +#define AMDGPU_VRAM_TYPE_UNKNOWN 0 +#define AMDGPU_VRAM_TYPE_GDDR1 1 +#define AMDGPU_VRAM_TYPE_DDR2 2 +#define AMDGPU_VRAM_TYPE_GDDR3 3 +#define AMDGPU_VRAM_TYPE_GDDR4 4 +#define AMDGPU_VRAM_TYPE_GDDR5 5 +#define AMDGPU_VRAM_TYPE_HBM 6 +#define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 +#define AMDGPU_VRAM_TYPE_GDDR6 9 +#define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 +#define AMDGPU_VRAM_TYPE_HBM3E 13 + +#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096 + +struct drm_amdgpu_info_device { + /** PCI Device ID */ + __u32 device_id; + /** Internal chip revision: A0, A1, etc.) */ + __u32 chip_rev; + __u32 external_rev; + /** Revision id in PCI Config space */ + __u32 pci_rev; + __u32 family; + __u32 num_shader_engines; + __u32 num_shader_arrays_per_engine; + /* in KHz */ + __u32 gpu_counter_freq; + __u64 max_engine_clock; + __u64 max_memory_clock; + /* cu information */ + __u32 cu_active_number; + /* NOTE: cu_ao_mask is INVALID, DON'T use it */ + __u32 cu_ao_mask; + __u32 cu_bitmap[4][4]; + /** Render backend pipe mask. One render backend is CB+DB. */ + __u32 enabled_rb_pipes_mask; + __u32 num_rb_pipes; + __u32 num_hw_gfx_contexts; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; + __u64 ids_flags; + /** Starting virtual address for UMDs. */ + __u64 virtual_address_offset; + /** The maximum virtual address */ + __u64 virtual_address_max; + /** Required alignment of virtual addresses. */ + __u32 virtual_address_alignment; + /** Page table entry - fragment size */ + __u32 pte_fragment_size; + __u32 gart_page_size; + /** constant engine ram size*/ + __u32 ce_ram_size; + /** video memory type info*/ + __u32 vram_type; + /** video memory bit width*/ + __u32 vram_bit_width; + /* vce harvesting instance */ + __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_num_lanes; + /* always on cu bitmap */ + __u32 cu_ao_bitmap[4][4]; + /** Starting high virtual address for UMDs. */ + __u64 high_va_offset; + /** The maximum high virtual address */ + __u64 high_va_max; + /* gfx10 pa_sc_tile_steering_override */ + __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; +}; + +struct drm_amdgpu_info_hw_ip { + /** Version of h/w IP */ + __u32 hw_ip_version_major; + __u32 hw_ip_version_minor; + /** Capabilities */ + __u64 capabilities_flags; + /** command buffer address start alignment*/ + __u32 ib_start_alignment; + /** command buffer size alignment*/ + __u32 ib_size_alignment; + /** Bitmask of available rings. Bit 0 means ring 0, etc. */ + __u32 available_rings; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; +}; + +/* GFX metadata BO sizes and alignment info (in bytes) */ +struct drm_amdgpu_info_uq_fw_areas_gfx { + /* shadow area size */ + __u32 shadow_size; + /* shadow area base virtual mem alignment */ + __u32 shadow_alignment; + /* context save area size */ + __u32 csa_size; + /* context save area base virtual mem alignment */ + __u32 csa_alignment; +}; + +/* IP specific fw related information used in the + * subquery AMDGPU_INFO_UQ_FW_AREAS + */ +struct drm_amdgpu_info_uq_fw_areas { + union { + struct drm_amdgpu_info_uq_fw_areas_gfx gfx; + }; +}; + +struct drm_amdgpu_info_num_handles { + /** Max handles as supported by firmware for UVD */ + __u32 uvd_max_handles; + /** Handles currently in use for UVD */ + __u32 uvd_used_handles; +}; + +#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES 6 + +struct drm_amdgpu_info_vce_clock_table_entry { + /** System clock */ + __u32 sclk; + /** Memory clock */ + __u32 mclk; + /** VCE clock */ + __u32 eclk; + __u32 pad; +}; + +struct drm_amdgpu_info_vce_clock_table { + struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES]; + __u32 num_valid_entries; + __u32 pad; +}; + +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 2 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC 3 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC 4 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG 5 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 6 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 7 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT 8 + +struct drm_amdgpu_info_video_codec_info { + __u32 valid; + __u32 max_width; + __u32 max_height; + __u32 max_pixels_per_frame; + __u32 max_level; + __u32 pad; +}; + +struct drm_amdgpu_info_video_caps { + struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; +}; + +#define AMDGPU_VMHUB_TYPE_MASK 0xff +#define AMDGPU_VMHUB_TYPE_SHIFT 0 +#define AMDGPU_VMHUB_TYPE_GFX 0 +#define AMDGPU_VMHUB_TYPE_MM0 1 +#define AMDGPU_VMHUB_TYPE_MM1 2 +#define AMDGPU_VMHUB_IDX_MASK 0xff00 +#define AMDGPU_VMHUB_IDX_SHIFT 8 + +struct drm_amdgpu_info_gpuvm_fault { + __u64 addr; + __u32 status; + __u32 vmhub; +}; + +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + +/* + * Supported GPU families + */ +#define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ +#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ +#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ +#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ +#define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ +#define AMDGPU_FAMILY_RV 142 /* Raven */ +#define AMDGPU_FAMILY_NV 143 /* Navi10 */ +#define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ +#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ +#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ + +#ifndef HAVE_DRM_COLOR_CTM_3X4 +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; +#endif + +/** + * Definition of System Unified Address (SUA) apertures + */ +#define AMDGPU_SUA_APERTURE_PRIVATE 1 +#define AMDGPU_SUA_APERTURE_SHARED 2 +struct drm_amdgpu_virtual_range { + uint64_t start; + uint64_t end; +}; + +struct drm_amdgpu_capability { + __u32 flag; + __u32 direct_gma_size; +}; + +/* + * Definition of free sync enter and exit signals + * We may have more options in the future + */ +#define AMDGPU_FREESYNC_FULLSCREEN_ENTER 1 +#define AMDGPU_FREESYNC_FULLSCREEN_EXIT 2 + +struct drm_amdgpu_freesync { + __u32 op; /* AMDGPU_FREESYNC_FULLSCREEN_ENTER or */ + /* AMDGPU_FREESYNC_FULLSCREEN_ENTER */ + __u32 spare[7]; +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/test/mockgpu/amd/amddriver.py b/test/mockgpu/amd/amddriver.py index 29038a097a..0e498f1d05 100644 --- a/test/mockgpu/amd/amddriver.py +++ b/test/mockgpu/amd/amddriver.py @@ -1,6 +1,7 @@ import pathlib, re, ctypes, mmap, collections, functools, copy, os import tinygrad.runtime.autogen.kfd as kfd import tinygrad.runtime.autogen.am.am as am +import tinygrad.runtime.autogen.amdgpu_drm as amdgpu_drm from tinygrad.helpers import from_mv from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props @@ -33,6 +34,16 @@ class DRMFileDesc(VirtFileDesc): super().__init__(fd) self.driver, self.gpu = driver, gpu + def ioctl(self, fd, request, argp): + struct = amdgpu_drm.struct_drm_amdgpu_info.from_address(argp) + if struct.query == amdgpu_drm.AMDGPU_INFO_DEV_INFO: + dev_info = amdgpu_drm.struct_drm_amdgpu_info_device.from_address(struct.return_pointer) + # mock of gfx1100 + for se in range(4): + for sa in range(4): dev_info.cu_bitmap[se][sa] = 0xff if (se * 4 + sa) < 12 else 0 + return 0 + raise NotImplementedError(f"unknown DRM ioctl query {struct.query}") + def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0) class AMDDriver(VirtDriver): diff --git a/tinygrad/runtime/autogen/__init__.py b/tinygrad/runtime/autogen/__init__.py index 9ad96bb33f..5c5174b335 100644 --- a/tinygrad/runtime/autogen/__init__.py +++ b/tinygrad/runtime/autogen/__init__.py @@ -105,6 +105,7 @@ def __getattr__(nm): case "amd_gpu": return load("amd_gpu", None, [root/f"extra/hip_gpu_driver/{s}.h" for s in ["sdma_registers", "nvd", "gc_11_0_0_offset", "sienna_cichlid_ip_offset"]], args=["-I/opt/rocm/include", "-x", "c++"]) + case "amdgpu_drm": return load("amdgpu_drm", None, [ "/usr/include/drm/drm.h", *[root/f"extra/hip_gpu_driver/{s}.h" for s in ["amdgpu_drm"]]]) case "kgsl": return load("kgsl", None, [root/"extra/qcom_gpu_driver/msm_kgsl.h"], args=["-D__user="]) case "qcom_dsp": return load("qcom_dsp", None, [root/f"extra/dsp/include/{s}.h" for s in ["ion", "msm_ion", "adsprpc_shared", "remote_default", "apps_std"]]) diff --git a/tinygrad/runtime/autogen/amdgpu_drm.py b/tinygrad/runtime/autogen/amdgpu_drm.py new file mode 100644 index 0000000000..6e967ed763 --- /dev/null +++ b/tinygrad/runtime/autogen/amdgpu_drm.py @@ -0,0 +1,1593 @@ +# mypy: disable-error-code="empty-body" +from __future__ import annotations +import ctypes +from typing import Annotated, Literal, TypeAlias +from tinygrad.runtime.support.c import _IO, _IOW, _IOR, _IOWR +from tinygrad.runtime.support import c +drm_handle_t: TypeAlias = Annotated[int, ctypes.c_uint32] +drm_context_t: TypeAlias = Annotated[int, ctypes.c_uint32] +drm_drawable_t: TypeAlias = Annotated[int, ctypes.c_uint32] +drm_magic_t: TypeAlias = Annotated[int, ctypes.c_uint32] +@c.record +class struct_drm_clip_rect(c.Struct): + SIZE = 8 + x1: Annotated[Annotated[int, ctypes.c_uint16], 0] + y1: Annotated[Annotated[int, ctypes.c_uint16], 2] + x2: Annotated[Annotated[int, ctypes.c_uint16], 4] + y2: Annotated[Annotated[int, ctypes.c_uint16], 6] +@c.record +class struct_drm_drawable_info(c.Struct): + SIZE = 16 + num_rects: Annotated[Annotated[int, ctypes.c_uint32], 0] + rects: Annotated[c.POINTER[struct_drm_clip_rect], 8] +@c.record +class struct_drm_tex_region(c.Struct): + SIZE = 8 + next: Annotated[Annotated[int, ctypes.c_ubyte], 0] + prev: Annotated[Annotated[int, ctypes.c_ubyte], 1] + in_use: Annotated[Annotated[int, ctypes.c_ubyte], 2] + padding: Annotated[Annotated[int, ctypes.c_ubyte], 3] + age: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_hw_lock(c.Struct): + SIZE = 64 + lock: Annotated[Annotated[int, ctypes.c_uint32], 0] + padding: Annotated[c.Array[Annotated[bytes, ctypes.c_char], Literal[60]], 4] +@c.record +class struct_drm_version(c.Struct): + SIZE = 64 + version_major: Annotated[Annotated[int, ctypes.c_int32], 0] + version_minor: Annotated[Annotated[int, ctypes.c_int32], 4] + version_patchlevel: Annotated[Annotated[int, ctypes.c_int32], 8] + name_len: Annotated[Annotated[int, ctypes.c_uint64], 16] + name: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 24] + date_len: Annotated[Annotated[int, ctypes.c_uint64], 32] + date: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 40] + desc_len: Annotated[Annotated[int, ctypes.c_uint64], 48] + desc: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 56] +__kernel_size_t: TypeAlias = Annotated[int, ctypes.c_uint64] +@c.record +class struct_drm_unique(c.Struct): + SIZE = 16 + unique_len: Annotated[Annotated[int, ctypes.c_uint64], 0] + unique: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 8] +@c.record +class struct_drm_list(c.Struct): + SIZE = 16 + count: Annotated[Annotated[int, ctypes.c_int32], 0] + version: Annotated[c.POINTER[struct_drm_version], 8] +@c.record +class struct_drm_block(c.Struct): + SIZE = 4 + unused: Annotated[Annotated[int, ctypes.c_int32], 0] +@c.record +class struct_drm_control(c.Struct): + SIZE = 8 + func: Annotated[struct_drm_control_func, 0] + irq: Annotated[Annotated[int, ctypes.c_int32], 4] +class struct_drm_control_func(Annotated[int, ctypes.c_uint32], c.Enum): pass +DRM_ADD_COMMAND = struct_drm_control_func.define('DRM_ADD_COMMAND', 0) +DRM_RM_COMMAND = struct_drm_control_func.define('DRM_RM_COMMAND', 1) +DRM_INST_HANDLER = struct_drm_control_func.define('DRM_INST_HANDLER', 2) +DRM_UNINST_HANDLER = struct_drm_control_func.define('DRM_UNINST_HANDLER', 3) + +class enum_drm_map_type(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_FRAME_BUFFER = enum_drm_map_type.define('_DRM_FRAME_BUFFER', 0) +_DRM_REGISTERS = enum_drm_map_type.define('_DRM_REGISTERS', 1) +_DRM_SHM = enum_drm_map_type.define('_DRM_SHM', 2) +_DRM_AGP = enum_drm_map_type.define('_DRM_AGP', 3) +_DRM_SCATTER_GATHER = enum_drm_map_type.define('_DRM_SCATTER_GATHER', 4) +_DRM_CONSISTENT = enum_drm_map_type.define('_DRM_CONSISTENT', 5) + +class enum_drm_map_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_RESTRICTED = enum_drm_map_flags.define('_DRM_RESTRICTED', 1) +_DRM_READ_ONLY = enum_drm_map_flags.define('_DRM_READ_ONLY', 2) +_DRM_LOCKED = enum_drm_map_flags.define('_DRM_LOCKED', 4) +_DRM_KERNEL = enum_drm_map_flags.define('_DRM_KERNEL', 8) +_DRM_WRITE_COMBINING = enum_drm_map_flags.define('_DRM_WRITE_COMBINING', 16) +_DRM_CONTAINS_LOCK = enum_drm_map_flags.define('_DRM_CONTAINS_LOCK', 32) +_DRM_REMOVABLE = enum_drm_map_flags.define('_DRM_REMOVABLE', 64) +_DRM_DRIVER = enum_drm_map_flags.define('_DRM_DRIVER', 128) + +@c.record +class struct_drm_ctx_priv_map(c.Struct): + SIZE = 16 + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + handle: Annotated[ctypes.c_void_p, 8] +@c.record +class struct_drm_map(c.Struct): + SIZE = 40 + offset: Annotated[Annotated[int, ctypes.c_uint64], 0] + size: Annotated[Annotated[int, ctypes.c_uint64], 8] + type: Annotated[enum_drm_map_type, 16] + flags: Annotated[enum_drm_map_flags, 20] + handle: Annotated[ctypes.c_void_p, 24] + mtrr: Annotated[Annotated[int, ctypes.c_int32], 32] +@c.record +class struct_drm_client(c.Struct): + SIZE = 40 + idx: Annotated[Annotated[int, ctypes.c_int32], 0] + auth: Annotated[Annotated[int, ctypes.c_int32], 4] + pid: Annotated[Annotated[int, ctypes.c_uint64], 8] + uid: Annotated[Annotated[int, ctypes.c_uint64], 16] + magic: Annotated[Annotated[int, ctypes.c_uint64], 24] + iocs: Annotated[Annotated[int, ctypes.c_uint64], 32] +class enum_drm_stat_type(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_STAT_LOCK = enum_drm_stat_type.define('_DRM_STAT_LOCK', 0) +_DRM_STAT_OPENS = enum_drm_stat_type.define('_DRM_STAT_OPENS', 1) +_DRM_STAT_CLOSES = enum_drm_stat_type.define('_DRM_STAT_CLOSES', 2) +_DRM_STAT_IOCTLS = enum_drm_stat_type.define('_DRM_STAT_IOCTLS', 3) +_DRM_STAT_LOCKS = enum_drm_stat_type.define('_DRM_STAT_LOCKS', 4) +_DRM_STAT_UNLOCKS = enum_drm_stat_type.define('_DRM_STAT_UNLOCKS', 5) +_DRM_STAT_VALUE = enum_drm_stat_type.define('_DRM_STAT_VALUE', 6) +_DRM_STAT_BYTE = enum_drm_stat_type.define('_DRM_STAT_BYTE', 7) +_DRM_STAT_COUNT = enum_drm_stat_type.define('_DRM_STAT_COUNT', 8) +_DRM_STAT_IRQ = enum_drm_stat_type.define('_DRM_STAT_IRQ', 9) +_DRM_STAT_PRIMARY = enum_drm_stat_type.define('_DRM_STAT_PRIMARY', 10) +_DRM_STAT_SECONDARY = enum_drm_stat_type.define('_DRM_STAT_SECONDARY', 11) +_DRM_STAT_DMA = enum_drm_stat_type.define('_DRM_STAT_DMA', 12) +_DRM_STAT_SPECIAL = enum_drm_stat_type.define('_DRM_STAT_SPECIAL', 13) +_DRM_STAT_MISSED = enum_drm_stat_type.define('_DRM_STAT_MISSED', 14) + +@c.record +class struct_drm_stats(c.Struct): + SIZE = 248 + count: Annotated[Annotated[int, ctypes.c_uint64], 0] + data: Annotated[c.Array[struct_drm_stats_data, Literal[15]], 8] +@c.record +class struct_drm_stats_data(c.Struct): + SIZE = 16 + value: Annotated[Annotated[int, ctypes.c_uint64], 0] + type: Annotated[enum_drm_stat_type, 8] +class enum_drm_lock_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_LOCK_READY = enum_drm_lock_flags.define('_DRM_LOCK_READY', 1) +_DRM_LOCK_QUIESCENT = enum_drm_lock_flags.define('_DRM_LOCK_QUIESCENT', 2) +_DRM_LOCK_FLUSH = enum_drm_lock_flags.define('_DRM_LOCK_FLUSH', 4) +_DRM_LOCK_FLUSH_ALL = enum_drm_lock_flags.define('_DRM_LOCK_FLUSH_ALL', 8) +_DRM_HALT_ALL_QUEUES = enum_drm_lock_flags.define('_DRM_HALT_ALL_QUEUES', 16) +_DRM_HALT_CUR_QUEUES = enum_drm_lock_flags.define('_DRM_HALT_CUR_QUEUES', 32) + +@c.record +class struct_drm_lock(c.Struct): + SIZE = 8 + context: Annotated[Annotated[int, ctypes.c_int32], 0] + flags: Annotated[enum_drm_lock_flags, 4] +class enum_drm_dma_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_DMA_BLOCK = enum_drm_dma_flags.define('_DRM_DMA_BLOCK', 1) +_DRM_DMA_WHILE_LOCKED = enum_drm_dma_flags.define('_DRM_DMA_WHILE_LOCKED', 2) +_DRM_DMA_PRIORITY = enum_drm_dma_flags.define('_DRM_DMA_PRIORITY', 4) +_DRM_DMA_WAIT = enum_drm_dma_flags.define('_DRM_DMA_WAIT', 16) +_DRM_DMA_SMALLER_OK = enum_drm_dma_flags.define('_DRM_DMA_SMALLER_OK', 32) +_DRM_DMA_LARGER_OK = enum_drm_dma_flags.define('_DRM_DMA_LARGER_OK', 64) + +@c.record +class struct_drm_buf_desc(c.Struct): + SIZE = 32 + count: Annotated[Annotated[int, ctypes.c_int32], 0] + size: Annotated[Annotated[int, ctypes.c_int32], 4] + low_mark: Annotated[Annotated[int, ctypes.c_int32], 8] + high_mark: Annotated[Annotated[int, ctypes.c_int32], 12] + flags: Annotated[struct_drm_buf_desc_flags, 16] + agp_start: Annotated[Annotated[int, ctypes.c_uint64], 24] +class struct_drm_buf_desc_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_PAGE_ALIGN = struct_drm_buf_desc_flags.define('_DRM_PAGE_ALIGN', 1) +_DRM_AGP_BUFFER = struct_drm_buf_desc_flags.define('_DRM_AGP_BUFFER', 2) +_DRM_SG_BUFFER = struct_drm_buf_desc_flags.define('_DRM_SG_BUFFER', 4) +_DRM_FB_BUFFER = struct_drm_buf_desc_flags.define('_DRM_FB_BUFFER', 8) +_DRM_PCI_BUFFER_RO = struct_drm_buf_desc_flags.define('_DRM_PCI_BUFFER_RO', 16) + +@c.record +class struct_drm_buf_info(c.Struct): + SIZE = 16 + count: Annotated[Annotated[int, ctypes.c_int32], 0] + list: Annotated[c.POINTER[struct_drm_buf_desc], 8] +@c.record +class struct_drm_buf_free(c.Struct): + SIZE = 16 + count: Annotated[Annotated[int, ctypes.c_int32], 0] + list: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 8] +@c.record +class struct_drm_buf_pub(c.Struct): + SIZE = 24 + idx: Annotated[Annotated[int, ctypes.c_int32], 0] + total: Annotated[Annotated[int, ctypes.c_int32], 4] + used: Annotated[Annotated[int, ctypes.c_int32], 8] + address: Annotated[ctypes.c_void_p, 16] +@c.record +class struct_drm_buf_map(c.Struct): + SIZE = 24 + count: Annotated[Annotated[int, ctypes.c_int32], 0] + virtual: Annotated[ctypes.c_void_p, 8] + list: Annotated[c.POINTER[struct_drm_buf_pub], 16] +@c.record +class struct_drm_dma(c.Struct): + SIZE = 64 + context: Annotated[Annotated[int, ctypes.c_int32], 0] + send_count: Annotated[Annotated[int, ctypes.c_int32], 4] + send_indices: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 8] + send_sizes: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 16] + flags: Annotated[enum_drm_dma_flags, 24] + request_count: Annotated[Annotated[int, ctypes.c_int32], 28] + request_size: Annotated[Annotated[int, ctypes.c_int32], 32] + request_indices: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 40] + request_sizes: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 48] + granted_count: Annotated[Annotated[int, ctypes.c_int32], 56] +class enum_drm_ctx_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_CONTEXT_PRESERVED = enum_drm_ctx_flags.define('_DRM_CONTEXT_PRESERVED', 1) +_DRM_CONTEXT_2DONLY = enum_drm_ctx_flags.define('_DRM_CONTEXT_2DONLY', 2) + +@c.record +class struct_drm_ctx(c.Struct): + SIZE = 8 + handle: Annotated[drm_context_t, 0] + flags: Annotated[enum_drm_ctx_flags, 4] +@c.record +class struct_drm_ctx_res(c.Struct): + SIZE = 16 + count: Annotated[Annotated[int, ctypes.c_int32], 0] + contexts: Annotated[c.POINTER[struct_drm_ctx], 8] +@c.record +class struct_drm_draw(c.Struct): + SIZE = 4 + handle: Annotated[drm_drawable_t, 0] +class drm_drawable_info_type_t(Annotated[int, ctypes.c_uint32], c.Enum): pass +DRM_DRAWABLE_CLIPRECTS = drm_drawable_info_type_t.define('DRM_DRAWABLE_CLIPRECTS', 0) + +@c.record +class struct_drm_update_draw(c.Struct): + SIZE = 24 + handle: Annotated[drm_drawable_t, 0] + type: Annotated[Annotated[int, ctypes.c_uint32], 4] + num: Annotated[Annotated[int, ctypes.c_uint32], 8] + data: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_auth(c.Struct): + SIZE = 4 + magic: Annotated[drm_magic_t, 0] +@c.record +class struct_drm_irq_busid(c.Struct): + SIZE = 16 + irq: Annotated[Annotated[int, ctypes.c_int32], 0] + busnum: Annotated[Annotated[int, ctypes.c_int32], 4] + devnum: Annotated[Annotated[int, ctypes.c_int32], 8] + funcnum: Annotated[Annotated[int, ctypes.c_int32], 12] +class enum_drm_vblank_seq_type(Annotated[int, ctypes.c_uint32], c.Enum): pass +_DRM_VBLANK_ABSOLUTE = enum_drm_vblank_seq_type.define('_DRM_VBLANK_ABSOLUTE', 0) +_DRM_VBLANK_RELATIVE = enum_drm_vblank_seq_type.define('_DRM_VBLANK_RELATIVE', 1) +_DRM_VBLANK_HIGH_CRTC_MASK = enum_drm_vblank_seq_type.define('_DRM_VBLANK_HIGH_CRTC_MASK', 62) +_DRM_VBLANK_EVENT = enum_drm_vblank_seq_type.define('_DRM_VBLANK_EVENT', 67108864) +_DRM_VBLANK_FLIP = enum_drm_vblank_seq_type.define('_DRM_VBLANK_FLIP', 134217728) +_DRM_VBLANK_NEXTONMISS = enum_drm_vblank_seq_type.define('_DRM_VBLANK_NEXTONMISS', 268435456) +_DRM_VBLANK_SECONDARY = enum_drm_vblank_seq_type.define('_DRM_VBLANK_SECONDARY', 536870912) +_DRM_VBLANK_SIGNAL = enum_drm_vblank_seq_type.define('_DRM_VBLANK_SIGNAL', 1073741824) + +@c.record +class struct_drm_wait_vblank_request(c.Struct): + SIZE = 16 + type: Annotated[enum_drm_vblank_seq_type, 0] + sequence: Annotated[Annotated[int, ctypes.c_uint32], 4] + signal: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_wait_vblank_reply(c.Struct): + SIZE = 24 + type: Annotated[enum_drm_vblank_seq_type, 0] + sequence: Annotated[Annotated[int, ctypes.c_uint32], 4] + tval_sec: Annotated[Annotated[int, ctypes.c_int64], 8] + tval_usec: Annotated[Annotated[int, ctypes.c_int64], 16] +@c.record +class union_drm_wait_vblank(c.Struct): + SIZE = 24 + request: Annotated[struct_drm_wait_vblank_request, 0] + reply: Annotated[struct_drm_wait_vblank_reply, 0] +@c.record +class struct_drm_modeset_ctl(c.Struct): + SIZE = 8 + crtc: Annotated[Annotated[int, ctypes.c_uint32], 0] + cmd: Annotated[Annotated[int, ctypes.c_uint32], 4] +__u32: TypeAlias = Annotated[int, ctypes.c_uint32] +@c.record +class struct_drm_agp_mode(c.Struct): + SIZE = 8 + mode: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class struct_drm_agp_buffer(c.Struct): + SIZE = 32 + size: Annotated[Annotated[int, ctypes.c_uint64], 0] + handle: Annotated[Annotated[int, ctypes.c_uint64], 8] + type: Annotated[Annotated[int, ctypes.c_uint64], 16] + physical: Annotated[Annotated[int, ctypes.c_uint64], 24] +@c.record +class struct_drm_agp_binding(c.Struct): + SIZE = 16 + handle: Annotated[Annotated[int, ctypes.c_uint64], 0] + offset: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_agp_info(c.Struct): + SIZE = 56 + agp_version_major: Annotated[Annotated[int, ctypes.c_int32], 0] + agp_version_minor: Annotated[Annotated[int, ctypes.c_int32], 4] + mode: Annotated[Annotated[int, ctypes.c_uint64], 8] + aperture_base: Annotated[Annotated[int, ctypes.c_uint64], 16] + aperture_size: Annotated[Annotated[int, ctypes.c_uint64], 24] + memory_allowed: Annotated[Annotated[int, ctypes.c_uint64], 32] + memory_used: Annotated[Annotated[int, ctypes.c_uint64], 40] + id_vendor: Annotated[Annotated[int, ctypes.c_uint16], 48] + id_device: Annotated[Annotated[int, ctypes.c_uint16], 50] +@c.record +class struct_drm_scatter_gather(c.Struct): + SIZE = 16 + size: Annotated[Annotated[int, ctypes.c_uint64], 0] + handle: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_set_version(c.Struct): + SIZE = 16 + drm_di_major: Annotated[Annotated[int, ctypes.c_int32], 0] + drm_di_minor: Annotated[Annotated[int, ctypes.c_int32], 4] + drm_dd_major: Annotated[Annotated[int, ctypes.c_int32], 8] + drm_dd_minor: Annotated[Annotated[int, ctypes.c_int32], 12] +@c.record +class struct_drm_gem_close(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_gem_flink(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + name: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_gem_open(c.Struct): + SIZE = 16 + name: Annotated[Annotated[int, ctypes.c_uint32], 0] + handle: Annotated[Annotated[int, ctypes.c_uint32], 4] + size: Annotated[Annotated[int, ctypes.c_uint64], 8] +__u64: TypeAlias = Annotated[int, ctypes.c_uint64] +@c.record +class struct_drm_get_cap(c.Struct): + SIZE = 16 + capability: Annotated[Annotated[int, ctypes.c_uint64], 0] + value: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_set_client_cap(c.Struct): + SIZE = 16 + capability: Annotated[Annotated[int, ctypes.c_uint64], 0] + value: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_prime_handle(c.Struct): + SIZE = 12 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + fd: Annotated[Annotated[int, ctypes.c_int32], 8] +__s32: TypeAlias = Annotated[int, ctypes.c_int32] +@c.record +class struct_drm_syncobj_create(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_syncobj_destroy(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_syncobj_handle(c.Struct): + SIZE = 16 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + fd: Annotated[Annotated[int, ctypes.c_int32], 8] + pad: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_syncobj_transfer(c.Struct): + SIZE = 32 + src_handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + dst_handle: Annotated[Annotated[int, ctypes.c_uint32], 4] + src_point: Annotated[Annotated[int, ctypes.c_uint64], 8] + dst_point: Annotated[Annotated[int, ctypes.c_uint64], 16] + flags: Annotated[Annotated[int, ctypes.c_uint32], 24] + pad: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class struct_drm_syncobj_wait(c.Struct): + SIZE = 40 + handles: Annotated[Annotated[int, ctypes.c_uint64], 0] + timeout_nsec: Annotated[Annotated[int, ctypes.c_int64], 8] + count_handles: Annotated[Annotated[int, ctypes.c_uint32], 16] + flags: Annotated[Annotated[int, ctypes.c_uint32], 20] + first_signaled: Annotated[Annotated[int, ctypes.c_uint32], 24] + pad: Annotated[Annotated[int, ctypes.c_uint32], 28] + deadline_nsec: Annotated[Annotated[int, ctypes.c_uint64], 32] +__s64: TypeAlias = Annotated[int, ctypes.c_int64] +@c.record +class struct_drm_syncobj_timeline_wait(c.Struct): + SIZE = 48 + handles: Annotated[Annotated[int, ctypes.c_uint64], 0] + points: Annotated[Annotated[int, ctypes.c_uint64], 8] + timeout_nsec: Annotated[Annotated[int, ctypes.c_int64], 16] + count_handles: Annotated[Annotated[int, ctypes.c_uint32], 24] + flags: Annotated[Annotated[int, ctypes.c_uint32], 28] + first_signaled: Annotated[Annotated[int, ctypes.c_uint32], 32] + pad: Annotated[Annotated[int, ctypes.c_uint32], 36] + deadline_nsec: Annotated[Annotated[int, ctypes.c_uint64], 40] +@c.record +class struct_drm_syncobj_eventfd(c.Struct): + SIZE = 24 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + point: Annotated[Annotated[int, ctypes.c_uint64], 8] + fd: Annotated[Annotated[int, ctypes.c_int32], 16] + pad: Annotated[Annotated[int, ctypes.c_uint32], 20] +@c.record +class struct_drm_syncobj_array(c.Struct): + SIZE = 16 + handles: Annotated[Annotated[int, ctypes.c_uint64], 0] + count_handles: Annotated[Annotated[int, ctypes.c_uint32], 8] + pad: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_syncobj_timeline_array(c.Struct): + SIZE = 24 + handles: Annotated[Annotated[int, ctypes.c_uint64], 0] + points: Annotated[Annotated[int, ctypes.c_uint64], 8] + count_handles: Annotated[Annotated[int, ctypes.c_uint32], 16] + flags: Annotated[Annotated[int, ctypes.c_uint32], 20] +@c.record +class struct_drm_crtc_get_sequence(c.Struct): + SIZE = 24 + crtc_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + active: Annotated[Annotated[int, ctypes.c_uint32], 4] + sequence: Annotated[Annotated[int, ctypes.c_uint64], 8] + sequence_ns: Annotated[Annotated[int, ctypes.c_int64], 16] +@c.record +class struct_drm_crtc_queue_sequence(c.Struct): + SIZE = 24 + crtc_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + sequence: Annotated[Annotated[int, ctypes.c_uint64], 8] + user_data: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_event(c.Struct): + SIZE = 8 + type: Annotated[Annotated[int, ctypes.c_uint32], 0] + length: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_event_vblank(c.Struct): + SIZE = 32 + base: Annotated[struct_drm_event, 0] + user_data: Annotated[Annotated[int, ctypes.c_uint64], 8] + tv_sec: Annotated[Annotated[int, ctypes.c_uint32], 16] + tv_usec: Annotated[Annotated[int, ctypes.c_uint32], 20] + sequence: Annotated[Annotated[int, ctypes.c_uint32], 24] + crtc_id: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class struct_drm_event_crtc_sequence(c.Struct): + SIZE = 32 + base: Annotated[struct_drm_event, 0] + user_data: Annotated[Annotated[int, ctypes.c_uint64], 8] + time_ns: Annotated[Annotated[int, ctypes.c_int64], 16] + sequence: Annotated[Annotated[int, ctypes.c_uint64], 24] +drm_clip_rect_t: TypeAlias = struct_drm_clip_rect +drm_drawable_info_t: TypeAlias = struct_drm_drawable_info +drm_tex_region_t: TypeAlias = struct_drm_tex_region +drm_hw_lock_t: TypeAlias = struct_drm_hw_lock +drm_version_t: TypeAlias = struct_drm_version +drm_unique_t: TypeAlias = struct_drm_unique +drm_list_t: TypeAlias = struct_drm_list +drm_block_t: TypeAlias = struct_drm_block +drm_control_t: TypeAlias = struct_drm_control +drm_map_type_t: TypeAlias = enum_drm_map_type +drm_map_flags_t: TypeAlias = enum_drm_map_flags +drm_ctx_priv_map_t: TypeAlias = struct_drm_ctx_priv_map +drm_map_t: TypeAlias = struct_drm_map +drm_client_t: TypeAlias = struct_drm_client +drm_stat_type_t: TypeAlias = enum_drm_stat_type +drm_stats_t: TypeAlias = struct_drm_stats +drm_lock_flags_t: TypeAlias = enum_drm_lock_flags +drm_lock_t: TypeAlias = struct_drm_lock +drm_dma_flags_t: TypeAlias = enum_drm_dma_flags +drm_buf_desc_t: TypeAlias = struct_drm_buf_desc +drm_buf_info_t: TypeAlias = struct_drm_buf_info +drm_buf_free_t: TypeAlias = struct_drm_buf_free +drm_buf_pub_t: TypeAlias = struct_drm_buf_pub +drm_buf_map_t: TypeAlias = struct_drm_buf_map +drm_dma_t: TypeAlias = struct_drm_dma +drm_wait_vblank_t: TypeAlias = union_drm_wait_vblank +drm_agp_mode_t: TypeAlias = struct_drm_agp_mode +drm_ctx_flags_t: TypeAlias = enum_drm_ctx_flags +drm_ctx_t: TypeAlias = struct_drm_ctx +drm_ctx_res_t: TypeAlias = struct_drm_ctx_res +drm_draw_t: TypeAlias = struct_drm_draw +drm_update_draw_t: TypeAlias = struct_drm_update_draw +drm_auth_t: TypeAlias = struct_drm_auth +drm_irq_busid_t: TypeAlias = struct_drm_irq_busid +drm_vblank_seq_type_t: TypeAlias = enum_drm_vblank_seq_type +drm_agp_buffer_t: TypeAlias = struct_drm_agp_buffer +drm_agp_binding_t: TypeAlias = struct_drm_agp_binding +drm_agp_info_t: TypeAlias = struct_drm_agp_info +drm_scatter_gather_t: TypeAlias = struct_drm_scatter_gather +drm_set_version_t: TypeAlias = struct_drm_set_version +@c.record +class struct_drm_amdgpu_gem_create_in(c.Struct): + SIZE = 32 + bo_size: Annotated[Annotated[int, ctypes.c_uint64], 0] + alignment: Annotated[Annotated[int, ctypes.c_uint64], 8] + domains: Annotated[Annotated[int, ctypes.c_uint64], 16] + domain_flags: Annotated[Annotated[int, ctypes.c_uint64], 24] +@c.record +class struct_drm_amdgpu_gem_create_out(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_gem_create(c.Struct): + SIZE = 32 + _in: Annotated[struct_drm_amdgpu_gem_create_in, 0] + out: Annotated[struct_drm_amdgpu_gem_create_out, 0] +@c.record +class struct_drm_amdgpu_bo_list_in(c.Struct): + SIZE = 24 + operation: Annotated[Annotated[int, ctypes.c_uint32], 0] + list_handle: Annotated[Annotated[int, ctypes.c_uint32], 4] + bo_number: Annotated[Annotated[int, ctypes.c_uint32], 8] + bo_info_size: Annotated[Annotated[int, ctypes.c_uint32], 12] + bo_info_ptr: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_amdgpu_bo_list_entry(c.Struct): + SIZE = 8 + bo_handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + bo_priority: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_bo_list_out(c.Struct): + SIZE = 8 + list_handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_bo_list(c.Struct): + SIZE = 24 + _in: Annotated[struct_drm_amdgpu_bo_list_in, 0] + out: Annotated[struct_drm_amdgpu_bo_list_out, 0] +@c.record +class struct_drm_amdgpu_ctx_in(c.Struct): + SIZE = 16 + op: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 8] + priority: Annotated[Annotated[int, ctypes.c_int32], 12] +@c.record +class union_drm_amdgpu_ctx_out(c.Struct): + SIZE = 16 + alloc: Annotated[union_drm_amdgpu_ctx_out_alloc, 0] + state: Annotated[union_drm_amdgpu_ctx_out_state, 0] + pstate: Annotated[union_drm_amdgpu_ctx_out_pstate, 0] +@c.record +class union_drm_amdgpu_ctx_out_alloc(c.Struct): + SIZE = 8 + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_ctx_out_state(c.Struct): + SIZE = 16 + flags: Annotated[Annotated[int, ctypes.c_uint64], 0] + hangs: Annotated[Annotated[int, ctypes.c_uint32], 8] + reset_status: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class union_drm_amdgpu_ctx_out_pstate(c.Struct): + SIZE = 8 + flags: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_ctx(c.Struct): + SIZE = 16 + _in: Annotated[struct_drm_amdgpu_ctx_in, 0] + out: Annotated[union_drm_amdgpu_ctx_out, 0] +@c.record +class struct_drm_amdgpu_userq_in(c.Struct): + SIZE = 72 + op: Annotated[Annotated[int, ctypes.c_uint32], 0] + queue_id: Annotated[Annotated[int, ctypes.c_uint32], 4] + ip_type: Annotated[Annotated[int, ctypes.c_uint32], 8] + doorbell_handle: Annotated[Annotated[int, ctypes.c_uint32], 12] + doorbell_offset: Annotated[Annotated[int, ctypes.c_uint32], 16] + flags: Annotated[Annotated[int, ctypes.c_uint32], 20] + queue_va: Annotated[Annotated[int, ctypes.c_uint64], 24] + queue_size: Annotated[Annotated[int, ctypes.c_uint64], 32] + rptr_va: Annotated[Annotated[int, ctypes.c_uint64], 40] + wptr_va: Annotated[Annotated[int, ctypes.c_uint64], 48] + mqd: Annotated[Annotated[int, ctypes.c_uint64], 56] + mqd_size: Annotated[Annotated[int, ctypes.c_uint64], 64] +@c.record +class struct_drm_amdgpu_userq_out(c.Struct): + SIZE = 8 + queue_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_userq(c.Struct): + SIZE = 72 + _in: Annotated[struct_drm_amdgpu_userq_in, 0] + out: Annotated[struct_drm_amdgpu_userq_out, 0] +@c.record +class struct_drm_amdgpu_userq_mqd_gfx11(c.Struct): + SIZE = 16 + shadow_va: Annotated[Annotated[int, ctypes.c_uint64], 0] + csa_va: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_amdgpu_userq_mqd_sdma_gfx11(c.Struct): + SIZE = 8 + csa_va: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class struct_drm_amdgpu_userq_mqd_compute_gfx11(c.Struct): + SIZE = 8 + eop_va: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class struct_drm_amdgpu_userq_signal(c.Struct): + SIZE = 48 + queue_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + pad: Annotated[Annotated[int, ctypes.c_uint32], 4] + syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 8] + num_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 16] + bo_read_handles: Annotated[Annotated[int, ctypes.c_uint64], 24] + bo_write_handles: Annotated[Annotated[int, ctypes.c_uint64], 32] + num_bo_read_handles: Annotated[Annotated[int, ctypes.c_uint32], 40] + num_bo_write_handles: Annotated[Annotated[int, ctypes.c_uint32], 44] +@c.record +class struct_drm_amdgpu_userq_fence_info(c.Struct): + SIZE = 16 + va: Annotated[Annotated[int, ctypes.c_uint64], 0] + value: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_amdgpu_userq_wait(c.Struct): + SIZE = 72 + waitq_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + pad: Annotated[Annotated[int, ctypes.c_uint32], 4] + syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 8] + syncobj_timeline_handles: Annotated[Annotated[int, ctypes.c_uint64], 16] + syncobj_timeline_points: Annotated[Annotated[int, ctypes.c_uint64], 24] + bo_read_handles: Annotated[Annotated[int, ctypes.c_uint64], 32] + bo_write_handles: Annotated[Annotated[int, ctypes.c_uint64], 40] + num_syncobj_timeline_handles: Annotated[Annotated[int, ctypes.c_uint16], 48] + num_fences: Annotated[Annotated[int, ctypes.c_uint16], 50] + num_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint32], 52] + num_bo_read_handles: Annotated[Annotated[int, ctypes.c_uint32], 56] + num_bo_write_handles: Annotated[Annotated[int, ctypes.c_uint32], 60] + out_fences: Annotated[Annotated[int, ctypes.c_uint64], 64] +__u16: TypeAlias = Annotated[int, ctypes.c_uint16] +class struct_drm_amdgpu_sem_in(ctypes.Structure): pass +class union_drm_amdgpu_sem_out(ctypes.Union): pass +class union_drm_amdgpu_sem(ctypes.Union): pass +@c.record +class struct_drm_amdgpu_vm_in(c.Struct): + SIZE = 8 + op: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_vm_out(c.Struct): + SIZE = 8 + flags: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class union_drm_amdgpu_vm(c.Struct): + SIZE = 8 + _in: Annotated[struct_drm_amdgpu_vm_in, 0] + out: Annotated[struct_drm_amdgpu_vm_out, 0] +@c.record +class struct_drm_amdgpu_sched_in(c.Struct): + SIZE = 16 + op: Annotated[Annotated[int, ctypes.c_uint32], 0] + fd: Annotated[Annotated[int, ctypes.c_uint32], 4] + priority: Annotated[Annotated[int, ctypes.c_int32], 8] + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class union_drm_amdgpu_sched(c.Struct): + SIZE = 16 + _in: Annotated[struct_drm_amdgpu_sched_in, 0] +@c.record +class struct_drm_amdgpu_gem_userptr(c.Struct): + SIZE = 24 + addr: Annotated[Annotated[int, ctypes.c_uint64], 0] + size: Annotated[Annotated[int, ctypes.c_uint64], 8] + flags: Annotated[Annotated[int, ctypes.c_uint32], 16] + handle: Annotated[Annotated[int, ctypes.c_uint32], 20] +@c.record +class struct_drm_amdgpu_gem_dgma(c.Struct): + SIZE = 24 + addr: Annotated[Annotated[int, ctypes.c_uint64], 0] + size: Annotated[Annotated[int, ctypes.c_uint64], 8] + op: Annotated[Annotated[int, ctypes.c_uint32], 16] + handle: Annotated[Annotated[int, ctypes.c_uint32], 20] +@c.record +class struct_drm_amdgpu_gem_metadata(c.Struct): + SIZE = 288 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + op: Annotated[Annotated[int, ctypes.c_uint32], 4] + data: Annotated[struct_drm_amdgpu_gem_metadata_data, 8] +@c.record +class struct_drm_amdgpu_gem_metadata_data(c.Struct): + SIZE = 280 + flags: Annotated[Annotated[int, ctypes.c_uint64], 0] + tiling_info: Annotated[Annotated[int, ctypes.c_uint64], 8] + data_size_bytes: Annotated[Annotated[int, ctypes.c_uint32], 16] + data: Annotated[c.Array[Annotated[int, ctypes.c_uint32], Literal[64]], 20] +@c.record +class struct_drm_amdgpu_gem_mmap_in(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_gem_mmap_out(c.Struct): + SIZE = 8 + addr_ptr: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class union_drm_amdgpu_gem_mmap(c.Struct): + SIZE = 8 + _in: Annotated[struct_drm_amdgpu_gem_mmap_in, 0] + out: Annotated[struct_drm_amdgpu_gem_mmap_out, 0] +@c.record +class struct_drm_amdgpu_gem_wait_idle_in(c.Struct): + SIZE = 16 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + timeout: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_amdgpu_gem_wait_idle_out(c.Struct): + SIZE = 8 + status: Annotated[Annotated[int, ctypes.c_uint32], 0] + domain: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_gem_wait_idle(c.Struct): + SIZE = 16 + _in: Annotated[struct_drm_amdgpu_gem_wait_idle_in, 0] + out: Annotated[struct_drm_amdgpu_gem_wait_idle_out, 0] +@c.record +class struct_drm_amdgpu_wait_cs_in(c.Struct): + SIZE = 32 + handle: Annotated[Annotated[int, ctypes.c_uint64], 0] + timeout: Annotated[Annotated[int, ctypes.c_uint64], 8] + ip_type: Annotated[Annotated[int, ctypes.c_uint32], 16] + ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 20] + ring: Annotated[Annotated[int, ctypes.c_uint32], 24] + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class struct_drm_amdgpu_wait_cs_out(c.Struct): + SIZE = 8 + status: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class union_drm_amdgpu_wait_cs(c.Struct): + SIZE = 32 + _in: Annotated[struct_drm_amdgpu_wait_cs_in, 0] + out: Annotated[struct_drm_amdgpu_wait_cs_out, 0] +@c.record +class struct_drm_amdgpu_fence(c.Struct): + SIZE = 24 + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + ip_type: Annotated[Annotated[int, ctypes.c_uint32], 4] + ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 8] + ring: Annotated[Annotated[int, ctypes.c_uint32], 12] + seq_no: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_amdgpu_wait_fences_in(c.Struct): + SIZE = 24 + fences: Annotated[Annotated[int, ctypes.c_uint64], 0] + fence_count: Annotated[Annotated[int, ctypes.c_uint32], 8] + wait_all: Annotated[Annotated[int, ctypes.c_uint32], 12] + timeout_ns: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_amdgpu_wait_fences_out(c.Struct): + SIZE = 8 + status: Annotated[Annotated[int, ctypes.c_uint32], 0] + first_signaled: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class union_drm_amdgpu_wait_fences(c.Struct): + SIZE = 24 + _in: Annotated[struct_drm_amdgpu_wait_fences_in, 0] + out: Annotated[struct_drm_amdgpu_wait_fences_out, 0] +@c.record +class struct_drm_amdgpu_gem_op(c.Struct): + SIZE = 16 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + op: Annotated[Annotated[int, ctypes.c_uint32], 4] + value: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_amdgpu_gem_va(c.Struct): + SIZE = 64 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 4] + operation: Annotated[Annotated[int, ctypes.c_uint32], 8] + flags: Annotated[Annotated[int, ctypes.c_uint32], 12] + va_address: Annotated[Annotated[int, ctypes.c_uint64], 16] + offset_in_bo: Annotated[Annotated[int, ctypes.c_uint64], 24] + map_size: Annotated[Annotated[int, ctypes.c_uint64], 32] + vm_timeline_point: Annotated[Annotated[int, ctypes.c_uint64], 40] + vm_timeline_syncobj_out: Annotated[Annotated[int, ctypes.c_uint32], 48] + num_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint32], 52] + input_fence_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 56] +@c.record +class struct_drm_amdgpu_cs_chunk(c.Struct): + SIZE = 16 + chunk_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + length_dw: Annotated[Annotated[int, ctypes.c_uint32], 4] + chunk_data: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class struct_drm_amdgpu_cs_in(c.Struct): + SIZE = 24 + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + bo_list_handle: Annotated[Annotated[int, ctypes.c_uint32], 4] + num_chunks: Annotated[Annotated[int, ctypes.c_uint32], 8] + flags: Annotated[Annotated[int, ctypes.c_uint32], 12] + chunks: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_amdgpu_cs_out(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint64], 0] +@c.record +class union_drm_amdgpu_cs(c.Struct): + SIZE = 24 + _in: Annotated[struct_drm_amdgpu_cs_in, 0] + out: Annotated[struct_drm_amdgpu_cs_out, 0] +@c.record +class struct_drm_amdgpu_cs_chunk_ib(c.Struct): + SIZE = 32 + _pad: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + va_start: Annotated[Annotated[int, ctypes.c_uint64], 8] + ib_bytes: Annotated[Annotated[int, ctypes.c_uint32], 16] + ip_type: Annotated[Annotated[int, ctypes.c_uint32], 20] + ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 24] + ring: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class struct_drm_amdgpu_cs_chunk_dep(c.Struct): + SIZE = 24 + ip_type: Annotated[Annotated[int, ctypes.c_uint32], 0] + ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 4] + ring: Annotated[Annotated[int, ctypes.c_uint32], 8] + ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 12] + handle: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_amdgpu_cs_chunk_fence(c.Struct): + SIZE = 8 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + offset: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_cs_chunk_sem(c.Struct): + SIZE = 4 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] +@c.record +class struct_drm_amdgpu_cs_chunk_syncobj(c.Struct): + SIZE = 16 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] + flags: Annotated[Annotated[int, ctypes.c_uint32], 4] + point: Annotated[Annotated[int, ctypes.c_uint64], 8] +@c.record +class union_drm_amdgpu_fence_to_handle(c.Struct): + SIZE = 32 + _in: Annotated[union_drm_amdgpu_fence_to_handle_in, 0] + out: Annotated[union_drm_amdgpu_fence_to_handle_out, 0] +@c.record +class union_drm_amdgpu_fence_to_handle_in(c.Struct): + SIZE = 32 + fence: Annotated[struct_drm_amdgpu_fence, 0] + what: Annotated[Annotated[int, ctypes.c_uint32], 24] + pad: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class union_drm_amdgpu_fence_to_handle_out(c.Struct): + SIZE = 4 + handle: Annotated[Annotated[int, ctypes.c_uint32], 0] +@c.record +class struct_drm_amdgpu_cs_chunk_data(c.Struct): + SIZE = 32 + ib_data: Annotated[struct_drm_amdgpu_cs_chunk_ib, 0] + fence_data: Annotated[struct_drm_amdgpu_cs_chunk_fence, 0] +@c.record +class struct_drm_amdgpu_cs_chunk_cp_gfx_shadow(c.Struct): + SIZE = 32 + shadow_va: Annotated[Annotated[int, ctypes.c_uint64], 0] + csa_va: Annotated[Annotated[int, ctypes.c_uint64], 8] + gds_va: Annotated[Annotated[int, ctypes.c_uint64], 16] + flags: Annotated[Annotated[int, ctypes.c_uint64], 24] +@c.record +class struct_drm_amdgpu_query_fw(c.Struct): + SIZE = 16 + fw_type: Annotated[Annotated[int, ctypes.c_uint32], 0] + ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 4] + index: Annotated[Annotated[int, ctypes.c_uint32], 8] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_amdgpu_info(c.Struct): + SIZE = 16 + return_pointer: Annotated[Annotated[int, ctypes.c_uint64], 0] + return_size: Annotated[Annotated[int, ctypes.c_uint32], 8] + query: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_amdgpu_info_gds(c.Struct): + SIZE = 32 + gds_gfx_partition_size: Annotated[Annotated[int, ctypes.c_uint32], 0] + compute_partition_size: Annotated[Annotated[int, ctypes.c_uint32], 4] + gds_total_size: Annotated[Annotated[int, ctypes.c_uint32], 8] + gws_per_gfx_partition: Annotated[Annotated[int, ctypes.c_uint32], 12] + gws_per_compute_partition: Annotated[Annotated[int, ctypes.c_uint32], 16] + oa_per_gfx_partition: Annotated[Annotated[int, ctypes.c_uint32], 20] + oa_per_compute_partition: Annotated[Annotated[int, ctypes.c_uint32], 24] + _pad: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class struct_drm_amdgpu_info_vram_gtt(c.Struct): + SIZE = 24 + vram_size: Annotated[Annotated[int, ctypes.c_uint64], 0] + vram_cpu_accessible_size: Annotated[Annotated[int, ctypes.c_uint64], 8] + gtt_size: Annotated[Annotated[int, ctypes.c_uint64], 16] +@c.record +class struct_drm_amdgpu_heap_info(c.Struct): + SIZE = 32 + total_heap_size: Annotated[Annotated[int, ctypes.c_uint64], 0] + usable_heap_size: Annotated[Annotated[int, ctypes.c_uint64], 8] + heap_usage: Annotated[Annotated[int, ctypes.c_uint64], 16] + max_allocation: Annotated[Annotated[int, ctypes.c_uint64], 24] +@c.record +class struct_drm_amdgpu_memory_info(c.Struct): + SIZE = 96 + vram: Annotated[struct_drm_amdgpu_heap_info, 0] + cpu_accessible_vram: Annotated[struct_drm_amdgpu_heap_info, 32] + gtt: Annotated[struct_drm_amdgpu_heap_info, 64] +@c.record +class struct_drm_amdgpu_info_firmware(c.Struct): + SIZE = 8 + ver: Annotated[Annotated[int, ctypes.c_uint32], 0] + feature: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_info_vbios(c.Struct): + SIZE = 200 + name: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[64]], 0] + vbios_pn: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[64]], 64] + version: Annotated[Annotated[int, ctypes.c_uint32], 128] + pad: Annotated[Annotated[int, ctypes.c_uint32], 132] + vbios_ver_str: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[32]], 136] + date: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[32]], 168] +__u8: TypeAlias = Annotated[int, ctypes.c_ubyte] +@c.record +class struct_drm_amdgpu_info_device(c.Struct): + SIZE = 448 + device_id: Annotated[Annotated[int, ctypes.c_uint32], 0] + chip_rev: Annotated[Annotated[int, ctypes.c_uint32], 4] + external_rev: Annotated[Annotated[int, ctypes.c_uint32], 8] + pci_rev: Annotated[Annotated[int, ctypes.c_uint32], 12] + family: Annotated[Annotated[int, ctypes.c_uint32], 16] + num_shader_engines: Annotated[Annotated[int, ctypes.c_uint32], 20] + num_shader_arrays_per_engine: Annotated[Annotated[int, ctypes.c_uint32], 24] + gpu_counter_freq: Annotated[Annotated[int, ctypes.c_uint32], 28] + max_engine_clock: Annotated[Annotated[int, ctypes.c_uint64], 32] + max_memory_clock: Annotated[Annotated[int, ctypes.c_uint64], 40] + cu_active_number: Annotated[Annotated[int, ctypes.c_uint32], 48] + cu_ao_mask: Annotated[Annotated[int, ctypes.c_uint32], 52] + cu_bitmap: Annotated[c.Array[c.Array[Annotated[int, ctypes.c_uint32], Literal[4]], Literal[4]], 56] + enabled_rb_pipes_mask: Annotated[Annotated[int, ctypes.c_uint32], 120] + num_rb_pipes: Annotated[Annotated[int, ctypes.c_uint32], 124] + num_hw_gfx_contexts: Annotated[Annotated[int, ctypes.c_uint32], 128] + pcie_gen: Annotated[Annotated[int, ctypes.c_uint32], 132] + ids_flags: Annotated[Annotated[int, ctypes.c_uint64], 136] + virtual_address_offset: Annotated[Annotated[int, ctypes.c_uint64], 144] + virtual_address_max: Annotated[Annotated[int, ctypes.c_uint64], 152] + virtual_address_alignment: Annotated[Annotated[int, ctypes.c_uint32], 160] + pte_fragment_size: Annotated[Annotated[int, ctypes.c_uint32], 164] + gart_page_size: Annotated[Annotated[int, ctypes.c_uint32], 168] + ce_ram_size: Annotated[Annotated[int, ctypes.c_uint32], 172] + vram_type: Annotated[Annotated[int, ctypes.c_uint32], 176] + vram_bit_width: Annotated[Annotated[int, ctypes.c_uint32], 180] + vce_harvest_config: Annotated[Annotated[int, ctypes.c_uint32], 184] + gc_double_offchip_lds_buf: Annotated[Annotated[int, ctypes.c_uint32], 188] + prim_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 192] + pos_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 200] + cntl_sb_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 208] + param_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 216] + prim_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 224] + pos_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 228] + cntl_sb_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 232] + param_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 236] + wave_front_size: Annotated[Annotated[int, ctypes.c_uint32], 240] + num_shader_visible_vgprs: Annotated[Annotated[int, ctypes.c_uint32], 244] + num_cu_per_sh: Annotated[Annotated[int, ctypes.c_uint32], 248] + num_tcc_blocks: Annotated[Annotated[int, ctypes.c_uint32], 252] + gs_vgt_table_depth: Annotated[Annotated[int, ctypes.c_uint32], 256] + gs_prim_buffer_depth: Annotated[Annotated[int, ctypes.c_uint32], 260] + max_gs_waves_per_vgt: Annotated[Annotated[int, ctypes.c_uint32], 264] + pcie_num_lanes: Annotated[Annotated[int, ctypes.c_uint32], 268] + cu_ao_bitmap: Annotated[c.Array[c.Array[Annotated[int, ctypes.c_uint32], Literal[4]], Literal[4]], 272] + high_va_offset: Annotated[Annotated[int, ctypes.c_uint64], 336] + high_va_max: Annotated[Annotated[int, ctypes.c_uint64], 344] + pa_sc_tile_steering_override: Annotated[Annotated[int, ctypes.c_uint32], 352] + tcc_disabled_mask: Annotated[Annotated[int, ctypes.c_uint64], 360] + min_engine_clock: Annotated[Annotated[int, ctypes.c_uint64], 368] + min_memory_clock: Annotated[Annotated[int, ctypes.c_uint64], 376] + tcp_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 384] + num_sqc_per_wgp: Annotated[Annotated[int, ctypes.c_uint32], 388] + sqc_data_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 392] + sqc_inst_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 396] + gl1c_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 400] + gl2c_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 404] + mall_size: Annotated[Annotated[int, ctypes.c_uint64], 408] + enabled_rb_pipes_mask_hi: Annotated[Annotated[int, ctypes.c_uint32], 416] + shadow_size: Annotated[Annotated[int, ctypes.c_uint32], 420] + shadow_alignment: Annotated[Annotated[int, ctypes.c_uint32], 424] + csa_size: Annotated[Annotated[int, ctypes.c_uint32], 428] + csa_alignment: Annotated[Annotated[int, ctypes.c_uint32], 432] + userq_ip_mask: Annotated[Annotated[int, ctypes.c_uint32], 436] + pad: Annotated[Annotated[int, ctypes.c_uint32], 440] +@c.record +class struct_drm_amdgpu_info_hw_ip(c.Struct): + SIZE = 32 + hw_ip_version_major: Annotated[Annotated[int, ctypes.c_uint32], 0] + hw_ip_version_minor: Annotated[Annotated[int, ctypes.c_uint32], 4] + capabilities_flags: Annotated[Annotated[int, ctypes.c_uint64], 8] + ib_start_alignment: Annotated[Annotated[int, ctypes.c_uint32], 16] + ib_size_alignment: Annotated[Annotated[int, ctypes.c_uint32], 20] + available_rings: Annotated[Annotated[int, ctypes.c_uint32], 24] + ip_discovery_version: Annotated[Annotated[int, ctypes.c_uint32], 28] +@c.record +class struct_drm_amdgpu_info_uq_fw_areas_gfx(c.Struct): + SIZE = 16 + shadow_size: Annotated[Annotated[int, ctypes.c_uint32], 0] + shadow_alignment: Annotated[Annotated[int, ctypes.c_uint32], 4] + csa_size: Annotated[Annotated[int, ctypes.c_uint32], 8] + csa_alignment: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_amdgpu_info_uq_fw_areas(c.Struct): + SIZE = 16 + gfx: Annotated[struct_drm_amdgpu_info_uq_fw_areas_gfx, 0] +@c.record +class struct_drm_amdgpu_info_num_handles(c.Struct): + SIZE = 8 + uvd_max_handles: Annotated[Annotated[int, ctypes.c_uint32], 0] + uvd_used_handles: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_info_vce_clock_table_entry(c.Struct): + SIZE = 16 + sclk: Annotated[Annotated[int, ctypes.c_uint32], 0] + mclk: Annotated[Annotated[int, ctypes.c_uint32], 4] + eclk: Annotated[Annotated[int, ctypes.c_uint32], 8] + pad: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_amdgpu_info_vce_clock_table(c.Struct): + SIZE = 104 + entries: Annotated[c.Array[struct_drm_amdgpu_info_vce_clock_table_entry, Literal[6]], 0] + num_valid_entries: Annotated[Annotated[int, ctypes.c_uint32], 96] + pad: Annotated[Annotated[int, ctypes.c_uint32], 100] +@c.record +class struct_drm_amdgpu_info_video_codec_info(c.Struct): + SIZE = 24 + valid: Annotated[Annotated[int, ctypes.c_uint32], 0] + max_width: Annotated[Annotated[int, ctypes.c_uint32], 4] + max_height: Annotated[Annotated[int, ctypes.c_uint32], 8] + max_pixels_per_frame: Annotated[Annotated[int, ctypes.c_uint32], 12] + max_level: Annotated[Annotated[int, ctypes.c_uint32], 16] + pad: Annotated[Annotated[int, ctypes.c_uint32], 20] +@c.record +class struct_drm_amdgpu_info_video_caps(c.Struct): + SIZE = 192 + codec_info: Annotated[c.Array[struct_drm_amdgpu_info_video_codec_info, Literal[8]], 0] +@c.record +class struct_drm_amdgpu_info_gpuvm_fault(c.Struct): + SIZE = 16 + addr: Annotated[Annotated[int, ctypes.c_uint64], 0] + status: Annotated[Annotated[int, ctypes.c_uint32], 8] + vmhub: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_amdgpu_info_uq_metadata_gfx(c.Struct): + SIZE = 16 + shadow_size: Annotated[Annotated[int, ctypes.c_uint32], 0] + shadow_alignment: Annotated[Annotated[int, ctypes.c_uint32], 4] + csa_size: Annotated[Annotated[int, ctypes.c_uint32], 8] + csa_alignment: Annotated[Annotated[int, ctypes.c_uint32], 12] +@c.record +class struct_drm_amdgpu_info_uq_metadata(c.Struct): + SIZE = 16 + gfx: Annotated[struct_drm_amdgpu_info_uq_metadata_gfx, 0] +class _anonstruct0(ctypes.Structure): pass +class struct_drm_amdgpu_virtual_range(ctypes.Structure): pass +@c.record +class struct_drm_amdgpu_capability(c.Struct): + SIZE = 8 + flag: Annotated[Annotated[int, ctypes.c_uint32], 0] + direct_gma_size: Annotated[Annotated[int, ctypes.c_uint32], 4] +@c.record +class struct_drm_amdgpu_freesync(c.Struct): + SIZE = 32 + op: Annotated[Annotated[int, ctypes.c_uint32], 0] + spare: Annotated[c.Array[Annotated[int, ctypes.c_uint32], Literal[7]], 4] +c.init_records() +DRM_NAME = "drm" # type: ignore +DRM_MIN_ORDER = 5 # type: ignore +DRM_MAX_ORDER = 22 # type: ignore +DRM_RAM_PERCENT = 10 # type: ignore +_DRM_LOCK_HELD = 0x80000000 # type: ignore +_DRM_LOCK_CONT = 0x40000000 # type: ignore +_DRM_LOCK_IS_HELD = lambda lock: ((lock) & _DRM_LOCK_HELD) # type: ignore +_DRM_LOCK_IS_CONT = lambda lock: ((lock) & _DRM_LOCK_CONT) # type: ignore +_DRM_LOCKING_CONTEXT = lambda lock: ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) # type: ignore +_DRM_VBLANK_HIGH_CRTC_SHIFT = 1 # type: ignore +_DRM_VBLANK_TYPES_MASK = (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) # type: ignore +_DRM_VBLANK_FLAGS_MASK = (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) # type: ignore +_DRM_PRE_MODESET = 1 # type: ignore +_DRM_POST_MODESET = 2 # type: ignore +DRM_CAP_DUMB_BUFFER = 0x1 # type: ignore +DRM_CAP_VBLANK_HIGH_CRTC = 0x2 # type: ignore +DRM_CAP_DUMB_PREFERRED_DEPTH = 0x3 # type: ignore +DRM_CAP_DUMB_PREFER_SHADOW = 0x4 # type: ignore +DRM_CAP_PRIME = 0x5 # type: ignore +DRM_PRIME_CAP_IMPORT = 0x1 # type: ignore +DRM_PRIME_CAP_EXPORT = 0x2 # type: ignore +DRM_CAP_TIMESTAMP_MONOTONIC = 0x6 # type: ignore +DRM_CAP_ASYNC_PAGE_FLIP = 0x7 # type: ignore +DRM_CAP_CURSOR_WIDTH = 0x8 # type: ignore +DRM_CAP_CURSOR_HEIGHT = 0x9 # type: ignore +DRM_CAP_ADDFB2_MODIFIERS = 0x10 # type: ignore +DRM_CAP_PAGE_FLIP_TARGET = 0x11 # type: ignore +DRM_CAP_CRTC_IN_VBLANK_EVENT = 0x12 # type: ignore +DRM_CAP_SYNCOBJ = 0x13 # type: ignore +DRM_CAP_SYNCOBJ_TIMELINE = 0x14 # type: ignore +DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP = 0x15 # type: ignore +DRM_CLIENT_CAP_STEREO_3D = 1 # type: ignore +DRM_CLIENT_CAP_UNIVERSAL_PLANES = 2 # type: ignore +DRM_CLIENT_CAP_ATOMIC = 3 # type: ignore +DRM_CLIENT_CAP_ASPECT_RATIO = 4 # type: ignore +DRM_CLIENT_CAP_WRITEBACK_CONNECTORS = 5 # type: ignore +DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT = 6 # type: ignore +DRM_SYNCOBJ_CREATE_SIGNALED = (1 << 0) # type: ignore +DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE = (1 << 0) # type: ignore +DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE = (1 << 0) # type: ignore +DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL = (1 << 0) # type: ignore +DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT = (1 << 1) # type: ignore +DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE = (1 << 2) # type: ignore +DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE = (1 << 3) # type: ignore +DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED = (1 << 0) # type: ignore +DRM_CRTC_SEQUENCE_RELATIVE = 0x00000001 # type: ignore +DRM_CRTC_SEQUENCE_NEXT_ON_MISS = 0x00000002 # type: ignore +DRM_IOCTL_BASE = 'd' # type: ignore +DRM_IO = lambda nr: _IO(DRM_IOCTL_BASE,nr) # type: ignore +DRM_IOR = lambda nr,type: _IOR(DRM_IOCTL_BASE,nr,type) # type: ignore +DRM_IOW = lambda nr,type: _IOW(DRM_IOCTL_BASE,nr,type) # type: ignore +DRM_IOWR = lambda nr,type: _IOWR(DRM_IOCTL_BASE,nr,type) # type: ignore +DRM_IOCTL_VERSION = DRM_IOWR(0x00, struct_drm_version) # type: ignore +DRM_IOCTL_GET_UNIQUE = DRM_IOWR(0x01, struct_drm_unique) # type: ignore +DRM_IOCTL_GET_MAGIC = DRM_IOR( 0x02, struct_drm_auth) # type: ignore +DRM_IOCTL_IRQ_BUSID = DRM_IOWR(0x03, struct_drm_irq_busid) # type: ignore +DRM_IOCTL_GET_MAP = DRM_IOWR(0x04, struct_drm_map) # type: ignore +DRM_IOCTL_GET_CLIENT = DRM_IOWR(0x05, struct_drm_client) # type: ignore +DRM_IOCTL_GET_STATS = DRM_IOR( 0x06, struct_drm_stats) # type: ignore +DRM_IOCTL_SET_VERSION = DRM_IOWR(0x07, struct_drm_set_version) # type: ignore +DRM_IOCTL_MODESET_CTL = DRM_IOW(0x08, struct_drm_modeset_ctl) # type: ignore +DRM_IOCTL_GEM_CLOSE = DRM_IOW (0x09, struct_drm_gem_close) # type: ignore +DRM_IOCTL_GEM_FLINK = DRM_IOWR(0x0a, struct_drm_gem_flink) # type: ignore +DRM_IOCTL_GEM_OPEN = DRM_IOWR(0x0b, struct_drm_gem_open) # type: ignore +DRM_IOCTL_GET_CAP = DRM_IOWR(0x0c, struct_drm_get_cap) # type: ignore +DRM_IOCTL_SET_CLIENT_CAP = DRM_IOW( 0x0d, struct_drm_set_client_cap) # type: ignore +DRM_IOCTL_SET_UNIQUE = DRM_IOW( 0x10, struct_drm_unique) # type: ignore +DRM_IOCTL_AUTH_MAGIC = DRM_IOW( 0x11, struct_drm_auth) # type: ignore +DRM_IOCTL_BLOCK = DRM_IOWR(0x12, struct_drm_block) # type: ignore +DRM_IOCTL_UNBLOCK = DRM_IOWR(0x13, struct_drm_block) # type: ignore +DRM_IOCTL_CONTROL = DRM_IOW( 0x14, struct_drm_control) # type: ignore +DRM_IOCTL_ADD_MAP = DRM_IOWR(0x15, struct_drm_map) # type: ignore +DRM_IOCTL_ADD_BUFS = DRM_IOWR(0x16, struct_drm_buf_desc) # type: ignore +DRM_IOCTL_MARK_BUFS = DRM_IOW( 0x17, struct_drm_buf_desc) # type: ignore +DRM_IOCTL_INFO_BUFS = DRM_IOWR(0x18, struct_drm_buf_info) # type: ignore +DRM_IOCTL_MAP_BUFS = DRM_IOWR(0x19, struct_drm_buf_map) # type: ignore +DRM_IOCTL_FREE_BUFS = DRM_IOW( 0x1a, struct_drm_buf_free) # type: ignore +DRM_IOCTL_RM_MAP = DRM_IOW( 0x1b, struct_drm_map) # type: ignore +DRM_IOCTL_SET_SAREA_CTX = DRM_IOW( 0x1c, struct_drm_ctx_priv_map) # type: ignore +DRM_IOCTL_GET_SAREA_CTX = DRM_IOWR(0x1d, struct_drm_ctx_priv_map) # type: ignore +DRM_IOCTL_SET_MASTER = DRM_IO(0x1e) # type: ignore +DRM_IOCTL_DROP_MASTER = DRM_IO(0x1f) # type: ignore +DRM_IOCTL_ADD_CTX = DRM_IOWR(0x20, struct_drm_ctx) # type: ignore +DRM_IOCTL_RM_CTX = DRM_IOWR(0x21, struct_drm_ctx) # type: ignore +DRM_IOCTL_MOD_CTX = DRM_IOW( 0x22, struct_drm_ctx) # type: ignore +DRM_IOCTL_GET_CTX = DRM_IOWR(0x23, struct_drm_ctx) # type: ignore +DRM_IOCTL_SWITCH_CTX = DRM_IOW( 0x24, struct_drm_ctx) # type: ignore +DRM_IOCTL_NEW_CTX = DRM_IOW( 0x25, struct_drm_ctx) # type: ignore +DRM_IOCTL_RES_CTX = DRM_IOWR(0x26, struct_drm_ctx_res) # type: ignore +DRM_IOCTL_ADD_DRAW = DRM_IOWR(0x27, struct_drm_draw) # type: ignore +DRM_IOCTL_RM_DRAW = DRM_IOWR(0x28, struct_drm_draw) # type: ignore +DRM_IOCTL_DMA = DRM_IOWR(0x29, struct_drm_dma) # type: ignore +DRM_IOCTL_LOCK = DRM_IOW( 0x2a, struct_drm_lock) # type: ignore +DRM_IOCTL_UNLOCK = DRM_IOW( 0x2b, struct_drm_lock) # type: ignore +DRM_IOCTL_FINISH = DRM_IOW( 0x2c, struct_drm_lock) # type: ignore +DRM_IOCTL_PRIME_HANDLE_TO_FD = DRM_IOWR(0x2d, struct_drm_prime_handle) # type: ignore +DRM_IOCTL_PRIME_FD_TO_HANDLE = DRM_IOWR(0x2e, struct_drm_prime_handle) # type: ignore +DRM_IOCTL_AGP_ACQUIRE = DRM_IO( 0x30) # type: ignore +DRM_IOCTL_AGP_RELEASE = DRM_IO( 0x31) # type: ignore +DRM_IOCTL_AGP_ENABLE = DRM_IOW( 0x32, struct_drm_agp_mode) # type: ignore +DRM_IOCTL_AGP_INFO = DRM_IOR( 0x33, struct_drm_agp_info) # type: ignore +DRM_IOCTL_AGP_ALLOC = DRM_IOWR(0x34, struct_drm_agp_buffer) # type: ignore +DRM_IOCTL_AGP_FREE = DRM_IOW( 0x35, struct_drm_agp_buffer) # type: ignore +DRM_IOCTL_AGP_BIND = DRM_IOW( 0x36, struct_drm_agp_binding) # type: ignore +DRM_IOCTL_AGP_UNBIND = DRM_IOW( 0x37, struct_drm_agp_binding) # type: ignore +DRM_IOCTL_SG_ALLOC = DRM_IOWR(0x38, struct_drm_scatter_gather) # type: ignore +DRM_IOCTL_SG_FREE = DRM_IOW( 0x39, struct_drm_scatter_gather) # type: ignore +DRM_IOCTL_WAIT_VBLANK = DRM_IOWR(0x3a, union_drm_wait_vblank) # type: ignore +DRM_IOCTL_CRTC_GET_SEQUENCE = DRM_IOWR(0x3b, struct_drm_crtc_get_sequence) # type: ignore +DRM_IOCTL_CRTC_QUEUE_SEQUENCE = DRM_IOWR(0x3c, struct_drm_crtc_queue_sequence) # type: ignore +DRM_IOCTL_UPDATE_DRAW = DRM_IOW(0x3f, struct_drm_update_draw) # type: ignore +DRM_IOCTL_SYNCOBJ_CREATE = DRM_IOWR(0xBF, struct_drm_syncobj_create) # type: ignore +DRM_IOCTL_SYNCOBJ_DESTROY = DRM_IOWR(0xC0, struct_drm_syncobj_destroy) # type: ignore +DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD = DRM_IOWR(0xC1, struct_drm_syncobj_handle) # type: ignore +DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE = DRM_IOWR(0xC2, struct_drm_syncobj_handle) # type: ignore +DRM_IOCTL_SYNCOBJ_WAIT = DRM_IOWR(0xC3, struct_drm_syncobj_wait) # type: ignore +DRM_IOCTL_SYNCOBJ_RESET = DRM_IOWR(0xC4, struct_drm_syncobj_array) # type: ignore +DRM_IOCTL_SYNCOBJ_SIGNAL = DRM_IOWR(0xC5, struct_drm_syncobj_array) # type: ignore +DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT = DRM_IOWR(0xCA, struct_drm_syncobj_timeline_wait) # type: ignore +DRM_IOCTL_SYNCOBJ_QUERY = DRM_IOWR(0xCB, struct_drm_syncobj_timeline_array) # type: ignore +DRM_IOCTL_SYNCOBJ_TRANSFER = DRM_IOWR(0xCC, struct_drm_syncobj_transfer) # type: ignore +DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL = DRM_IOWR(0xCD, struct_drm_syncobj_timeline_array) # type: ignore +DRM_IOCTL_SYNCOBJ_EVENTFD = DRM_IOWR(0xCF, struct_drm_syncobj_eventfd) # type: ignore +DRM_COMMAND_BASE = 0x40 # type: ignore +DRM_COMMAND_END = 0xA0 # type: ignore +DRM_EVENT_VBLANK = 0x01 # type: ignore +DRM_EVENT_FLIP_COMPLETE = 0x02 # type: ignore +DRM_EVENT_CRTC_SEQUENCE = 0x03 # type: ignore +DRM_AMDGPU_GEM_CREATE = 0x00 # type: ignore +DRM_AMDGPU_GEM_MMAP = 0x01 # type: ignore +DRM_AMDGPU_CTX = 0x02 # type: ignore +DRM_AMDGPU_BO_LIST = 0x03 # type: ignore +DRM_AMDGPU_CS = 0x04 # type: ignore +DRM_AMDGPU_INFO = 0x05 # type: ignore +DRM_AMDGPU_GEM_METADATA = 0x06 # type: ignore +DRM_AMDGPU_GEM_WAIT_IDLE = 0x07 # type: ignore +DRM_AMDGPU_GEM_VA = 0x08 # type: ignore +DRM_AMDGPU_WAIT_CS = 0x09 # type: ignore +DRM_AMDGPU_GEM_OP = 0x10 # type: ignore +DRM_AMDGPU_GEM_USERPTR = 0x11 # type: ignore +DRM_AMDGPU_WAIT_FENCES = 0x12 # type: ignore +DRM_AMDGPU_VM = 0x13 # type: ignore +DRM_AMDGPU_FENCE_TO_HANDLE = 0x14 # type: ignore +DRM_AMDGPU_SCHED = 0x15 # type: ignore +DRM_AMDGPU_USERQ = 0x16 # type: ignore +DRM_AMDGPU_USERQ_SIGNAL = 0x17 # type: ignore +DRM_AMDGPU_USERQ_WAIT = 0x18 # type: ignore +DRM_AMDGPU_GEM_DGMA = 0x5c # type: ignore +DRM_AMDGPU_SEM = 0x5b # type: ignore +DRM_IOCTL_AMDGPU_GEM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union_drm_amdgpu_gem_create) # type: ignore +DRM_IOCTL_AMDGPU_GEM_MMAP = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union_drm_amdgpu_gem_mmap) # type: ignore +DRM_IOCTL_AMDGPU_CTX = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union_drm_amdgpu_ctx) # type: ignore +DRM_IOCTL_AMDGPU_BO_LIST = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union_drm_amdgpu_bo_list) # type: ignore +DRM_IOCTL_AMDGPU_CS = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union_drm_amdgpu_cs) # type: ignore +DRM_IOCTL_AMDGPU_INFO = DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct_drm_amdgpu_info) # type: ignore +DRM_IOCTL_AMDGPU_GEM_METADATA = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct_drm_amdgpu_gem_metadata) # type: ignore +DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union_drm_amdgpu_gem_wait_idle) # type: ignore +DRM_IOCTL_AMDGPU_GEM_VA = DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct_drm_amdgpu_gem_va) # type: ignore +DRM_IOCTL_AMDGPU_WAIT_CS = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union_drm_amdgpu_wait_cs) # type: ignore +DRM_IOCTL_AMDGPU_GEM_OP = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct_drm_amdgpu_gem_op) # type: ignore +DRM_IOCTL_AMDGPU_GEM_USERPTR = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct_drm_amdgpu_gem_userptr) # type: ignore +DRM_IOCTL_AMDGPU_WAIT_FENCES = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union_drm_amdgpu_wait_fences) # type: ignore +DRM_IOCTL_AMDGPU_VM = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union_drm_amdgpu_vm) # type: ignore +DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union_drm_amdgpu_fence_to_handle) # type: ignore +DRM_IOCTL_AMDGPU_SCHED = DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union_drm_amdgpu_sched) # type: ignore +DRM_IOCTL_AMDGPU_USERQ = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union_drm_amdgpu_userq) # type: ignore +DRM_IOCTL_AMDGPU_USERQ_SIGNAL = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct_drm_amdgpu_userq_signal) # type: ignore +DRM_IOCTL_AMDGPU_USERQ_WAIT = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct_drm_amdgpu_userq_wait) # type: ignore +DRM_IOCTL_AMDGPU_GEM_DGMA = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct_drm_amdgpu_gem_dgma) # type: ignore +DRM_IOCTL_AMDGPU_SEM = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union_drm_amdgpu_sem) # type: ignore +AMDGPU_GEM_DOMAIN_CPU = 0x1 # type: ignore +AMDGPU_GEM_DOMAIN_GTT = 0x2 # type: ignore +AMDGPU_GEM_DOMAIN_VRAM = 0x4 # type: ignore +AMDGPU_GEM_DOMAIN_GDS = 0x8 # type: ignore +AMDGPU_GEM_DOMAIN_GWS = 0x10 # type: ignore +AMDGPU_GEM_DOMAIN_OA = 0x20 # type: ignore +AMDGPU_GEM_DOMAIN_DOORBELL = 0x40 # type: ignore +AMDGPU_GEM_DOMAIN_DGMA = 0x400 # type: ignore +AMDGPU_GEM_DOMAIN_DGMA_IMPORT = 0x800 # type: ignore +AMDGPU_GEM_DOMAIN_MASK = (AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | AMDGPU_GEM_DOMAIN_DOORBELL | AMDGPU_GEM_DOMAIN_DGMA | AMDGPU_GEM_DOMAIN_DGMA_IMPORT) # type: ignore +AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED = (1 << 0) # type: ignore +AMDGPU_GEM_CREATE_NO_CPU_ACCESS = (1 << 1) # type: ignore +AMDGPU_GEM_CREATE_CPU_GTT_USWC = (1 << 2) # type: ignore +AMDGPU_GEM_CREATE_VRAM_CLEARED = (1 << 3) # type: ignore +AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS = (1 << 5) # type: ignore +AMDGPU_GEM_CREATE_VM_ALWAYS_VALID = (1 << 6) # type: ignore +AMDGPU_GEM_CREATE_EXPLICIT_SYNC = (1 << 7) # type: ignore +AMDGPU_GEM_CREATE_CP_MQD_GFX9 = (1 << 8) # type: ignore +AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE = (1 << 9) # type: ignore +AMDGPU_GEM_CREATE_ENCRYPTED = (1 << 10) # type: ignore +AMDGPU_GEM_CREATE_PREEMPTIBLE = (1 << 11) # type: ignore +AMDGPU_GEM_CREATE_DISCARDABLE = (1 << 12) # type: ignore +AMDGPU_GEM_CREATE_COHERENT = (1 << 13) # type: ignore +AMDGPU_GEM_CREATE_UNCACHED = (1 << 14) # type: ignore +AMDGPU_GEM_CREATE_EXT_COHERENT = (1 << 15) # type: ignore +AMDGPU_GEM_CREATE_GFX12_DCC = (1 << 16) # type: ignore +AMDGPU_GEM_CREATE_SPARSE = (1 << 29) # type: ignore +AMDGPU_GEM_CREATE_TOP_DOWN = (1 << 30) # type: ignore +AMDGPU_GEM_CREATE_NO_EVICT = (1 << 31) # type: ignore +AMDGPU_BO_LIST_OP_CREATE = 0 # type: ignore +AMDGPU_BO_LIST_OP_DESTROY = 1 # type: ignore +AMDGPU_BO_LIST_OP_UPDATE = 2 # type: ignore +AMDGPU_CTX_OP_ALLOC_CTX = 1 # type: ignore +AMDGPU_CTX_OP_FREE_CTX = 2 # type: ignore +AMDGPU_CTX_OP_QUERY_STATE = 3 # type: ignore +AMDGPU_CTX_OP_QUERY_STATE2 = 4 # type: ignore +AMDGPU_CTX_OP_GET_STABLE_PSTATE = 5 # type: ignore +AMDGPU_CTX_OP_SET_STABLE_PSTATE = 6 # type: ignore +AMDGPU_CTX_NO_RESET = 0 # type: ignore +AMDGPU_CTX_GUILTY_RESET = 1 # type: ignore +AMDGPU_CTX_INNOCENT_RESET = 2 # type: ignore +AMDGPU_CTX_UNKNOWN_RESET = 3 # type: ignore +AMDGPU_CTX_QUERY2_FLAGS_RESET = (1<<0) # type: ignore +AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST = (1<<1) # type: ignore +AMDGPU_CTX_QUERY2_FLAGS_GUILTY = (1<<2) # type: ignore +AMDGPU_CTX_QUERY2_FLAGS_RAS_CE = (1<<3) # type: ignore +AMDGPU_CTX_QUERY2_FLAGS_RAS_UE = (1<<4) # type: ignore +AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS = (1<<5) # type: ignore +AMDGPU_CTX_PRIORITY_UNSET = -2048 # type: ignore +AMDGPU_CTX_PRIORITY_VERY_LOW = -1023 # type: ignore +AMDGPU_CTX_PRIORITY_LOW = -512 # type: ignore +AMDGPU_CTX_PRIORITY_NORMAL = 0 # type: ignore +AMDGPU_CTX_PRIORITY_HIGH = 512 # type: ignore +AMDGPU_CTX_PRIORITY_VERY_HIGH = 1023 # type: ignore +AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK = 0xf # type: ignore +AMDGPU_CTX_STABLE_PSTATE_NONE = 0 # type: ignore +AMDGPU_CTX_STABLE_PSTATE_STANDARD = 1 # type: ignore +AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK = 2 # type: ignore +AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK = 3 # type: ignore +AMDGPU_CTX_STABLE_PSTATE_PEAK = 4 # type: ignore +AMDGPU_USERQ_OP_CREATE = 1 # type: ignore +AMDGPU_USERQ_OP_FREE = 2 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK = 0x3 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT = 0 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW = 0 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW = 1 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH = 2 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH = 3 # type: ignore +AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE = (1 << 2) # type: ignore +AMDGPU_SEM_OP_CREATE_SEM = 1 # type: ignore +AMDGPU_SEM_OP_WAIT_SEM = 2 # type: ignore +AMDGPU_SEM_OP_SIGNAL_SEM = 3 # type: ignore +AMDGPU_SEM_OP_DESTROY_SEM = 4 # type: ignore +AMDGPU_SEM_OP_IMPORT_SEM = 5 # type: ignore +AMDGPU_SEM_OP_EXPORT_SEM = 6 # type: ignore +AMDGPU_VM_OP_RESERVE_VMID = 1 # type: ignore +AMDGPU_VM_OP_UNRESERVE_VMID = 2 # type: ignore +AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE = 1 # type: ignore +AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE = 2 # type: ignore +AMDGPU_GEM_USERPTR_READONLY = (1 << 0) # type: ignore +AMDGPU_GEM_USERPTR_ANONONLY = (1 << 1) # type: ignore +AMDGPU_GEM_USERPTR_VALIDATE = (1 << 2) # type: ignore +AMDGPU_GEM_USERPTR_REGISTER = (1 << 3) # type: ignore +AMDGPU_GEM_DGMA_IMPORT = 0 # type: ignore +AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR = 1 # type: ignore +AMDGPU_TILING_ARRAY_MODE_SHIFT = 0 # type: ignore +AMDGPU_TILING_ARRAY_MODE_MASK = 0xf # type: ignore +AMDGPU_TILING_PIPE_CONFIG_SHIFT = 4 # type: ignore +AMDGPU_TILING_PIPE_CONFIG_MASK = 0x1f # type: ignore +AMDGPU_TILING_TILE_SPLIT_SHIFT = 9 # type: ignore +AMDGPU_TILING_TILE_SPLIT_MASK = 0x7 # type: ignore +AMDGPU_TILING_MICRO_TILE_MODE_SHIFT = 12 # type: ignore +AMDGPU_TILING_MICRO_TILE_MODE_MASK = 0x7 # type: ignore +AMDGPU_TILING_BANK_WIDTH_SHIFT = 15 # type: ignore +AMDGPU_TILING_BANK_WIDTH_MASK = 0x3 # type: ignore +AMDGPU_TILING_BANK_HEIGHT_SHIFT = 17 # type: ignore +AMDGPU_TILING_BANK_HEIGHT_MASK = 0x3 # type: ignore +AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT = 19 # type: ignore +AMDGPU_TILING_MACRO_TILE_ASPECT_MASK = 0x3 # type: ignore +AMDGPU_TILING_NUM_BANKS_SHIFT = 21 # type: ignore +AMDGPU_TILING_NUM_BANKS_MASK = 0x3 # type: ignore +AMDGPU_TILING_SWIZZLE_MODE_SHIFT = 0 # type: ignore +AMDGPU_TILING_SWIZZLE_MODE_MASK = 0x1f # type: ignore +AMDGPU_TILING_DCC_OFFSET_256B_SHIFT = 5 # type: ignore +AMDGPU_TILING_DCC_OFFSET_256B_MASK = 0xFFFFFF # type: ignore +AMDGPU_TILING_DCC_PITCH_MAX_SHIFT = 29 # type: ignore +AMDGPU_TILING_DCC_PITCH_MAX_MASK = 0x3FFF # type: ignore +AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT = 43 # type: ignore +AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK = 0x1 # type: ignore +AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT = 44 # type: ignore +AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK = 0x1 # type: ignore +AMDGPU_TILING_SCANOUT_SHIFT = 63 # type: ignore +AMDGPU_TILING_SCANOUT_MASK = 0x1 # type: ignore +AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT = 0 # type: ignore +AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK = 0x7 # type: ignore +AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT = 3 # type: ignore +AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK = 0x3 # type: ignore +AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT = 5 # type: ignore +AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK = 0x7 # type: ignore +AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT = 8 # type: ignore +AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK = 0x3f # type: ignore +AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT = 14 # type: ignore +AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK = 0x1 # type: ignore +AMDGPU_TILING_GFX12_SCANOUT_SHIFT = 63 # type: ignore +AMDGPU_TILING_GFX12_SCANOUT_MASK = 0x1 # type: ignore +AMDGPU_GEM_METADATA_OP_SET_METADATA = 1 # type: ignore +AMDGPU_GEM_METADATA_OP_GET_METADATA = 2 # type: ignore +AMDGPU_GEM_OP_GET_GEM_CREATE_INFO = 0 # type: ignore +AMDGPU_GEM_OP_SET_PLACEMENT = 1 # type: ignore +AMDGPU_VA_OP_MAP = 1 # type: ignore +AMDGPU_VA_OP_UNMAP = 2 # type: ignore +AMDGPU_VA_OP_CLEAR = 3 # type: ignore +AMDGPU_VA_OP_REPLACE = 4 # type: ignore +AMDGPU_VM_DELAY_UPDATE = (1 << 0) # type: ignore +AMDGPU_VM_PAGE_READABLE = (1 << 1) # type: ignore +AMDGPU_VM_PAGE_WRITEABLE = (1 << 2) # type: ignore +AMDGPU_VM_PAGE_EXECUTABLE = (1 << 3) # type: ignore +AMDGPU_VM_PAGE_PRT = (1 << 4) # type: ignore +AMDGPU_VM_MTYPE_MASK = (0xf << 5) # type: ignore +AMDGPU_VM_MTYPE_DEFAULT = (0 << 5) # type: ignore +AMDGPU_VM_MTYPE_NC = (1 << 5) # type: ignore +AMDGPU_VM_MTYPE_WC = (2 << 5) # type: ignore +AMDGPU_VM_MTYPE_CC = (3 << 5) # type: ignore +AMDGPU_VM_MTYPE_UC = (4 << 5) # type: ignore +AMDGPU_VM_MTYPE_RW = (5 << 5) # type: ignore +AMDGPU_VM_PAGE_NOALLOC = (1 << 9) # type: ignore +AMDGPU_HW_IP_GFX = 0 # type: ignore +AMDGPU_HW_IP_COMPUTE = 1 # type: ignore +AMDGPU_HW_IP_DMA = 2 # type: ignore +AMDGPU_HW_IP_UVD = 3 # type: ignore +AMDGPU_HW_IP_VCE = 4 # type: ignore +AMDGPU_HW_IP_UVD_ENC = 5 # type: ignore +AMDGPU_HW_IP_VCN_DEC = 6 # type: ignore +AMDGPU_HW_IP_VCN_ENC = 7 # type: ignore +AMDGPU_HW_IP_VCN_JPEG = 8 # type: ignore +AMDGPU_HW_IP_VPE = 9 # type: ignore +AMDGPU_HW_IP_NUM = 10 # type: ignore +AMDGPU_HW_IP_INSTANCE_MAX_COUNT = 1 # type: ignore +AMDGPU_CHUNK_ID_IB = 0x01 # type: ignore +AMDGPU_CHUNK_ID_FENCE = 0x02 # type: ignore +AMDGPU_CHUNK_ID_DEPENDENCIES = 0x03 # type: ignore +AMDGPU_CHUNK_ID_SYNCOBJ_IN = 0x04 # type: ignore +AMDGPU_CHUNK_ID_SYNCOBJ_OUT = 0x05 # type: ignore +AMDGPU_CHUNK_ID_BO_HANDLES = 0x06 # type: ignore +AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES = 0x07 # type: ignore +AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT = 0x08 # type: ignore +AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL = 0x09 # type: ignore +AMDGPU_CHUNK_ID_CP_GFX_SHADOW = 0x0a # type: ignore +AMDGPU_IB_FLAG_CE = (1<<0) # type: ignore +AMDGPU_IB_FLAG_PREAMBLE = (1<<1) # type: ignore +AMDGPU_IB_FLAG_PREEMPT = (1<<2) # type: ignore +AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE = (1 << 3) # type: ignore +AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID = (1 << 4) # type: ignore +AMDGPU_IB_FLAGS_SECURE = (1 << 5) # type: ignore +AMDGPU_IB_FLAG_EMIT_MEM_SYNC = (1 << 6) # type: ignore +AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ = 0 # type: ignore +AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD = 1 # type: ignore +AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD = 2 # type: ignore +AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW = 0x1 # type: ignore +AMDGPU_IDS_FLAGS_FUSION = 0x1 # type: ignore +AMDGPU_IDS_FLAGS_PREEMPTION = 0x2 # type: ignore +AMDGPU_IDS_FLAGS_TMZ = 0x4 # type: ignore +AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD = 0x8 # type: ignore +AMDGPU_IDS_FLAGS_MODE_MASK = 0x300 # type: ignore +AMDGPU_IDS_FLAGS_MODE_SHIFT = 0x8 # type: ignore +AMDGPU_IDS_FLAGS_MODE_PF = 0x0 # type: ignore +AMDGPU_IDS_FLAGS_MODE_VF = 0x1 # type: ignore +AMDGPU_IDS_FLAGS_MODE_PT = 0x2 # type: ignore +AMDGPU_INFO_ACCEL_WORKING = 0x00 # type: ignore +AMDGPU_INFO_CRTC_FROM_ID = 0x01 # type: ignore +AMDGPU_INFO_HW_IP_INFO = 0x02 # type: ignore +AMDGPU_INFO_HW_IP_COUNT = 0x03 # type: ignore +AMDGPU_INFO_TIMESTAMP = 0x05 # type: ignore +AMDGPU_INFO_FW_VERSION = 0x0e # type: ignore +AMDGPU_INFO_FW_VCE = 0x1 # type: ignore +AMDGPU_INFO_FW_UVD = 0x2 # type: ignore +AMDGPU_INFO_FW_GMC = 0x03 # type: ignore +AMDGPU_INFO_FW_GFX_ME = 0x04 # type: ignore +AMDGPU_INFO_FW_GFX_PFP = 0x05 # type: ignore +AMDGPU_INFO_FW_GFX_CE = 0x06 # type: ignore +AMDGPU_INFO_FW_GFX_RLC = 0x07 # type: ignore +AMDGPU_INFO_FW_GFX_MEC = 0x08 # type: ignore +AMDGPU_INFO_FW_SMC = 0x0a # type: ignore +AMDGPU_INFO_FW_SDMA = 0x0b # type: ignore +AMDGPU_INFO_FW_SOS = 0x0c # type: ignore +AMDGPU_INFO_FW_ASD = 0x0d # type: ignore +AMDGPU_INFO_FW_VCN = 0x0e # type: ignore +AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL = 0x0f # type: ignore +AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM = 0x10 # type: ignore +AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM = 0x11 # type: ignore +AMDGPU_INFO_FW_DMCU = 0x12 # type: ignore +AMDGPU_INFO_FW_TA = 0x13 # type: ignore +AMDGPU_INFO_FW_DMCUB = 0x14 # type: ignore +AMDGPU_INFO_FW_TOC = 0x15 # type: ignore +AMDGPU_INFO_FW_CAP = 0x16 # type: ignore +AMDGPU_INFO_FW_GFX_RLCP = 0x17 # type: ignore +AMDGPU_INFO_FW_GFX_RLCV = 0x18 # type: ignore +AMDGPU_INFO_FW_MES_KIQ = 0x19 # type: ignore +AMDGPU_INFO_FW_MES = 0x1a # type: ignore +AMDGPU_INFO_FW_IMU = 0x1b # type: ignore +AMDGPU_INFO_FW_VPE = 0x1c # type: ignore +AMDGPU_INFO_NUM_BYTES_MOVED = 0x0f # type: ignore +AMDGPU_INFO_VRAM_USAGE = 0x10 # type: ignore +AMDGPU_INFO_GTT_USAGE = 0x11 # type: ignore +AMDGPU_INFO_GDS_CONFIG = 0x13 # type: ignore +AMDGPU_INFO_VRAM_GTT = 0x14 # type: ignore +AMDGPU_INFO_READ_MMR_REG = 0x15 # type: ignore +AMDGPU_INFO_DEV_INFO = 0x16 # type: ignore +AMDGPU_INFO_VIS_VRAM_USAGE = 0x17 # type: ignore +AMDGPU_INFO_NUM_EVICTIONS = 0x18 # type: ignore +AMDGPU_INFO_MEMORY = 0x19 # type: ignore +AMDGPU_INFO_VCE_CLOCK_TABLE = 0x1A # type: ignore +AMDGPU_INFO_VBIOS = 0x1B # type: ignore +AMDGPU_INFO_VBIOS_SIZE = 0x1 # type: ignore +AMDGPU_INFO_VBIOS_IMAGE = 0x2 # type: ignore +AMDGPU_INFO_VBIOS_INFO = 0x3 # type: ignore +AMDGPU_INFO_NUM_HANDLES = 0x1C # type: ignore +AMDGPU_INFO_SENSOR = 0x1D # type: ignore +AMDGPU_INFO_SENSOR_GFX_SCLK = 0x1 # type: ignore +AMDGPU_INFO_SENSOR_GFX_MCLK = 0x2 # type: ignore +AMDGPU_INFO_SENSOR_GPU_TEMP = 0x3 # type: ignore +AMDGPU_INFO_SENSOR_GPU_LOAD = 0x4 # type: ignore +AMDGPU_INFO_SENSOR_GPU_AVG_POWER = 0x5 # type: ignore +AMDGPU_INFO_SENSOR_VDDNB = 0x6 # type: ignore +AMDGPU_INFO_SENSOR_VDDGFX = 0x7 # type: ignore +AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK = 0x8 # type: ignore +AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK = 0x9 # type: ignore +AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK = 0xa # type: ignore +AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK = 0xb # type: ignore +AMDGPU_INFO_SENSOR_GPU_INPUT_POWER = 0xc # type: ignore +AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS = 0x1E # type: ignore +AMDGPU_INFO_VRAM_LOST_COUNTER = 0x1F # type: ignore +AMDGPU_INFO_RAS_ENABLED_FEATURES = 0x20 # type: ignore +AMDGPU_INFO_RAS_ENABLED_UMC = (1 << 0) # type: ignore +AMDGPU_INFO_RAS_ENABLED_SDMA = (1 << 1) # type: ignore +AMDGPU_INFO_RAS_ENABLED_GFX = (1 << 2) # type: ignore +AMDGPU_INFO_RAS_ENABLED_MMHUB = (1 << 3) # type: ignore +AMDGPU_INFO_RAS_ENABLED_ATHUB = (1 << 4) # type: ignore +AMDGPU_INFO_RAS_ENABLED_PCIE = (1 << 5) # type: ignore +AMDGPU_INFO_RAS_ENABLED_HDP = (1 << 6) # type: ignore +AMDGPU_INFO_RAS_ENABLED_XGMI = (1 << 7) # type: ignore +AMDGPU_INFO_RAS_ENABLED_DF = (1 << 8) # type: ignore +AMDGPU_INFO_RAS_ENABLED_SMN = (1 << 9) # type: ignore +AMDGPU_INFO_RAS_ENABLED_SEM = (1 << 10) # type: ignore +AMDGPU_INFO_RAS_ENABLED_MP0 = (1 << 11) # type: ignore +AMDGPU_INFO_RAS_ENABLED_MP1 = (1 << 12) # type: ignore +AMDGPU_INFO_RAS_ENABLED_FUSE = (1 << 13) # type: ignore +AMDGPU_INFO_VIDEO_CAPS = 0x21 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_DECODE = 0 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_ENCODE = 1 # type: ignore +AMDGPU_INFO_MAX_IBS = 0x22 # type: ignore +AMDGPU_INFO_GPUVM_FAULT = 0x23 # type: ignore +AMDGPU_INFO_UQ_FW_AREAS = 0x24 # type: ignore +AMDGPU_INFO_CAPABILITY = 0x50 # type: ignore +AMDGPU_INFO_VIRTUAL_RANGE = 0x51 # type: ignore +AMDGPU_CAPABILITY_PIN_MEM_FLAG = (1 << 0) # type: ignore +AMDGPU_CAPABILITY_DIRECT_GMA_FLAG = (1 << 1) # type: ignore +AMDGPU_INFO_MMR_SE_INDEX_SHIFT = 0 # type: ignore +AMDGPU_INFO_MMR_SE_INDEX_MASK = 0xff # type: ignore +AMDGPU_INFO_MMR_SH_INDEX_SHIFT = 8 # type: ignore +AMDGPU_INFO_MMR_SH_INDEX_MASK = 0xff # type: ignore +AMDGPU_VRAM_TYPE_UNKNOWN = 0 # type: ignore +AMDGPU_VRAM_TYPE_GDDR1 = 1 # type: ignore +AMDGPU_VRAM_TYPE_DDR2 = 2 # type: ignore +AMDGPU_VRAM_TYPE_GDDR3 = 3 # type: ignore +AMDGPU_VRAM_TYPE_GDDR4 = 4 # type: ignore +AMDGPU_VRAM_TYPE_GDDR5 = 5 # type: ignore +AMDGPU_VRAM_TYPE_HBM = 6 # type: ignore +AMDGPU_VRAM_TYPE_DDR3 = 7 # type: ignore +AMDGPU_VRAM_TYPE_DDR4 = 8 # type: ignore +AMDGPU_VRAM_TYPE_GDDR6 = 9 # type: ignore +AMDGPU_VRAM_TYPE_DDR5 = 10 # type: ignore +AMDGPU_VRAM_TYPE_LPDDR4 = 11 # type: ignore +AMDGPU_VRAM_TYPE_LPDDR5 = 12 # type: ignore +AMDGPU_VRAM_TYPE_HBM3E = 13 # type: ignore +AMDGPU_VRAM_TYPE_HBM_WIDTH = 4096 # type: ignore +AMDGPU_VCE_CLOCK_TABLE_ENTRIES = 6 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 = 0 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 = 1 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 = 2 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC = 3 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC = 4 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG = 5 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 = 6 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 = 7 # type: ignore +AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT = 8 # type: ignore +AMDGPU_VMHUB_TYPE_MASK = 0xff # type: ignore +AMDGPU_VMHUB_TYPE_SHIFT = 0 # type: ignore +AMDGPU_VMHUB_TYPE_GFX = 0 # type: ignore +AMDGPU_VMHUB_TYPE_MM0 = 1 # type: ignore +AMDGPU_VMHUB_TYPE_MM1 = 2 # type: ignore +AMDGPU_VMHUB_IDX_MASK = 0xff00 # type: ignore +AMDGPU_VMHUB_IDX_SHIFT = 8 # type: ignore +AMDGPU_FAMILY_UNKNOWN = 0 # type: ignore +AMDGPU_FAMILY_SI = 110 # type: ignore +AMDGPU_FAMILY_CI = 120 # type: ignore +AMDGPU_FAMILY_KV = 125 # type: ignore +AMDGPU_FAMILY_VI = 130 # type: ignore +AMDGPU_FAMILY_CZ = 135 # type: ignore +AMDGPU_FAMILY_AI = 141 # type: ignore +AMDGPU_FAMILY_RV = 142 # type: ignore +AMDGPU_FAMILY_NV = 143 # type: ignore +AMDGPU_FAMILY_VGH = 144 # type: ignore +AMDGPU_FAMILY_GC_11_0_0 = 145 # type: ignore +AMDGPU_FAMILY_YC = 146 # type: ignore +AMDGPU_FAMILY_GC_11_0_1 = 148 # type: ignore +AMDGPU_FAMILY_GC_10_3_6 = 149 # type: ignore +AMDGPU_FAMILY_GC_10_3_7 = 151 # type: ignore +AMDGPU_FAMILY_GC_11_5_0 = 150 # type: ignore +AMDGPU_FAMILY_GC_12_0_0 = 152 # type: ignore +AMDGPU_SUA_APERTURE_PRIVATE = 1 # type: ignore +AMDGPU_SUA_APERTURE_SHARED = 2 # type: ignore +AMDGPU_FREESYNC_FULLSCREEN_ENTER = 1 # type: ignore +AMDGPU_FREESYNC_FULLSCREEN_EXIT = 2 # type: ignore \ No newline at end of file diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index c8cb3d0299..b58689f61e 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -11,7 +11,7 @@ from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, Profil from tinygrad.helpers import VIZ, AMD_CC, AMD_LLVM, ceildiv from tinygrad.renderer.cstyle import AMDHIPRenderer, AMDHIPCCRenderer from tinygrad.renderer.llvmir import AMDLLVMRenderer -from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd +from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd, amdgpu_drm from tinygrad.runtime.autogen.am import am from tinygrad.runtime.support.elf import elf_loader from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager @@ -181,11 +181,13 @@ class AMDComputeQueue(HWQueue): for xcc in range(s.xcc): with self.pred_exec(xcc_mask=1 << xcc): for inst, se_idx, sa_idx, wgp_idx in itertools.product(range(s.inst), range(s.se), range(s.sa), range(s.wgp)): + loff = next(offset) + if s.wgp > 1 and not self.dev.iface.is_wgp_active(xcc, se_idx, sa_idx, wgp_idx): continue self.set_grbm(**({'instance':inst} if s.inst > 1 else ({'se':se_idx}|({'sh':sa_idx, 'wgp':wgp_idx} if self.dev.target[0] != 9 else {})))) # Copy counter to memory (src_sel = perf, dst_sel = tc_l2) lo, hi = getattr(self.gc, f'{s.regsample}_LO'), getattr(self.gc, f'{s.regsample}_HI', None) - self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+(loff:=next(offset)))) + self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+loff)) if hi is not None: self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, hi.addr[0], 0, *data64_le(buf.va_addr+loff+4)) return self.pmc_reset_counters(en=True) @@ -806,6 +808,13 @@ class KFDIface: else: raise RuntimeError("PMC/SQTT requires stable power state: run `amd-smi set -l stable_std` for KFD iface") + @functools.cached_property + def drm_dev_info(self) -> amdgpu_drm.struct_drm_amdgpu_info_device: + amdgpu_drm.DRM_IOCTL_AMDGPU_INFO(self.drm_fd, query=amdgpu_drm.AMDGPU_INFO_DEV_INFO, + return_pointer=ctypes.addressof(inf:=amdgpu_drm.struct_drm_amdgpu_info_device()), return_size=ctypes.sizeof(inf)) + return inf + def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return ((self.drm_dev_info.cu_bitmap[se % 4][sa + (se // 4) * 2] >> (2 * wgp)) & 0x3) == 0x3 + class PCIIface(PCIIfaceBase): gpus:ClassVar[list[str]] = [] @@ -816,6 +825,7 @@ class PCIIface(PCIIfaceBase): self.pci_dev.write_config(pci.PCI_COMMAND, self.pci_dev.read_config(pci.PCI_COMMAND, 2) | pci.PCI_COMMAND_MASTER, 2) def require_profile_mode(self): return True + def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return True # TODO: account for WGP disablement on some asics. def _setup_adev(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None): self.dev_impl:AMDev = AMDev(pci_dev, dma_regions)