From 8cd22df2dd24bc72a9a1ad324e5430d6c442f41c Mon Sep 17 00:00:00 2001
From: nimlgen <138685161+nimlgen@users.noreply.github.com>
Date: Fri, 23 Jan 2026 00:08:45 +0300
Subject: [PATCH] amd: alive wgps (#14149)

* amd: disabled wgps

* l

* wgp

* uoops

* mockgpu

* drm

* ad this

* fi

* reg
---
 .github/workflows/autogen.yml          |    4 +-
 extra/hip_gpu_driver/amdgpu_drm.h      | 1740 ++++++++++++++++++++++++
 test/mockgpu/amd/amddriver.py          |   11 +
 tinygrad/runtime/autogen/__init__.py   |    1 +
 tinygrad/runtime/autogen/amdgpu_drm.py | 1593 ++++++++++++++++++++++
 tinygrad/runtime/ops_amd.py            |   14 +-
 6 files changed, 3359 insertions(+), 4 deletions(-)
 create mode 100644 extra/hip_gpu_driver/amdgpu_drm.h
 create mode 100644 tinygrad/runtime/autogen/amdgpu_drm.py

diff --git a/.github/workflows/autogen.yml b/.github/workflows/autogen.yml
index 874ea0eb14..486c2b5e35 100644
--- a/.github/workflows/autogen.yml
+++ b/.github/workflows/autogen.yml
@@ -40,13 +40,13 @@ jobs:
         mesa: 'true'
         pydeps: 'pyyaml mako'
     - name: Install autogen support packages
-      run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev
+      run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev libdrm-dev
     - name: Regenerate autogen files
       run: |
         find tinygrad/runtime/autogen -type f -name "*.py" -not -name "__init__.py" -not -name "comgr_3.py" -not -name "metal.py" -not -name "iokit.py" -not -name "corefoundation.py" -not -name "libclang.py" -delete
         python3 -c "from tinygrad.runtime.autogen import opencl"
         python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_570, nv_580, nv"
-        python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd"
+        python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd, amdgpu_drm"
         python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v13_0_6, smu_v14_0_2"
         python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio"
         python3 -c "from tinygrad.runtime.autogen import llvm"
diff --git a/extra/hip_gpu_driver/amdgpu_drm.h b/extra/hip_gpu_driver/amdgpu_drm.h
new file mode 100644
index 0000000000..f3223c05f7
--- /dev/null
+++ b/extra/hip_gpu_driver/amdgpu_drm.h
@@ -0,0 +1,1740 @@
+/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*-
+ *
+ * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kevin E. Martin <martin@valinux.com>
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __AMDGPU_DRM_H__
+#define __AMDGPU_DRM_H__
+
+#include <drm/drm.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_AMDGPU_GEM_CREATE		0x00
+#define DRM_AMDGPU_GEM_MMAP		0x01
+#define DRM_AMDGPU_CTX			0x02
+#define DRM_AMDGPU_BO_LIST		0x03
+#define DRM_AMDGPU_CS			0x04
+#define DRM_AMDGPU_INFO			0x05
+#define DRM_AMDGPU_GEM_METADATA		0x06
+#define DRM_AMDGPU_GEM_WAIT_IDLE	0x07
+#define DRM_AMDGPU_GEM_VA		0x08
+#define DRM_AMDGPU_WAIT_CS		0x09
+#define DRM_AMDGPU_GEM_OP		0x10
+#define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_VM			0x13
+#define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
+#define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
+/* not upstream */
+#define DRM_AMDGPU_GEM_DGMA		0x5c
+
+/* hybrid specific ioctls */
+#define DRM_AMDGPU_SEM			0x5b
+
+#define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
+#define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
+#define DRM_IOCTL_AMDGPU_CTX		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx)
+#define DRM_IOCTL_AMDGPU_BO_LIST	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list)
+#define DRM_IOCTL_AMDGPU_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs)
+#define DRM_IOCTL_AMDGPU_INFO		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info)
+#define DRM_IOCTL_AMDGPU_GEM_METADATA	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata)
+#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle)
+#define DRM_IOCTL_AMDGPU_GEM_VA		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va)
+#define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
+#define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
+#define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
+#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
+#define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
+
+#define DRM_IOCTL_AMDGPU_GEM_DGMA	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma)
+
+/**
+ * DOC: memory domains
+ *
+ * %AMDGPU_GEM_DOMAIN_CPU	System memory that is not GPU accessible.
+ * Memory in this pool could be swapped out to disk if there is pressure.
+ *
+ * %AMDGPU_GEM_DOMAIN_GTT	GPU accessible system memory, mapped into the
+ * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
+ * pages of system memory, allows GPU access system memory in a linearized
+ * fashion.
+ *
+ * %AMDGPU_GEM_DOMAIN_VRAM	Local video memory. For APUs, it is memory
+ * carved out by the BIOS.
+ *
+ * %AMDGPU_GEM_DOMAIN_GDS	Global on-chip data storage used to share data
+ * across shader threads.
+ *
+ * %AMDGPU_GEM_DOMAIN_GWS	Global wave sync, used to synchronize the
+ * execution of all the waves on a device.
+ *
+ * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
+ * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL	Doorbell. It is an MMIO region for
+ * signalling user mode queues.
+ */
+/* hybrid specific ioctls */
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
+
+#define AMDGPU_GEM_DOMAIN_CPU		0x1
+#define AMDGPU_GEM_DOMAIN_GTT		0x2
+#define AMDGPU_GEM_DOMAIN_VRAM		0x4
+#define AMDGPU_GEM_DOMAIN_GDS		0x8
+#define AMDGPU_GEM_DOMAIN_GWS		0x10
+#define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL	0x40
+#define AMDGPU_GEM_DOMAIN_DGMA		0x400
+#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT	0x800
+#define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
+					 AMDGPU_GEM_DOMAIN_GTT | \
+					 AMDGPU_GEM_DOMAIN_VRAM | \
+					 AMDGPU_GEM_DOMAIN_GDS | \
+					 AMDGPU_GEM_DOMAIN_GWS | \
+					 AMDGPU_GEM_DOMAIN_OA |\
+					 AMDGPU_GEM_DOMAIN_DOORBELL |\
+					 AMDGPU_GEM_DOMAIN_DGMA |\
+					 AMDGPU_GEM_DOMAIN_DGMA_IMPORT)
+
+/* Flag that CPU access will be required for the case of VRAM domain */
+#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
+/* Flag that CPU access will not work, this VRAM domain is invisible */
+#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS		(1 << 1)
+/* Flag that USWC attributes should be used for GTT */
+#define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
+/* Flag that the memory should be in VRAM and cleared */
+#define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
+/* Flag that BO sharing will be explicitly synchronized */
+#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
+/* Flag that indicates allocating MQD gart on GFX9, where the mtype
+ * for the second page onward should be set to NC. It should never
+ * be used by user space applications.
+ */
+#define AMDGPU_GEM_CREATE_CP_MQD_GFX9		(1 << 8)
+/* Flag that BO may contain sensitive data that must be wiped before
+ * releasing the memory
+ */
+#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE	(1 << 9)
+/* Flag that BO will be encrypted and that the TMZ bit should be
+ * set in the PTEs when mapping this buffer via GPUVM or
+ * accessing it with various hw blocks
+ */
+#define AMDGPU_GEM_CREATE_ENCRYPTED		(1 << 10)
+/* Flag that BO will be used only in preemptible context, which does
+ * not require GTT memory accounting
+ */
+#define AMDGPU_GEM_CREATE_PREEMPTIBLE		(1 << 11)
+/* Flag that BO can be discarded under memory pressure without keeping the
+ * content.
+ */
+#define AMDGPU_GEM_CREATE_DISCARDABLE		(1 << 12)
+/* Flag that BO is shared coherently between multiple devices or CPU threads.
+ * May depend on GPU instructions to flush caches to system scope explicitly.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_COHERENT		(1 << 13)
+/* Flag that BO should not be cached by GPU. Coherent without having to flush
+ * GPU caches explicitly
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_UNCACHED		(1 << 14)
+/* Flag that BO should be coherent across devices when using device-level
+ * atomics. May depend on GPU instructions to flush caches to device scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT		(1 << 15)
+/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */
+#define AMDGPU_GEM_CREATE_GFX12_DCC		(1 << 16)
+
+/* hybrid specific */
+/* Flag that the memory should be in SPARSE resource */
+#define AMDGPU_GEM_CREATE_SPARSE		(1ULL << 29)
+/* Flag that the memory allocation should be from top of domain */
+#define AMDGPU_GEM_CREATE_TOP_DOWN		(1ULL << 30)
+/* Flag that the memory allocation should be pinned */
+#define AMDGPU_GEM_CREATE_NO_EVICT		(1ULL << 31)
+
+struct drm_amdgpu_gem_create_in  {
+	/** the requested memory size */
+	__u64 bo_size;
+	/** physical start_addr alignment in bytes for some HW requirements */
+	__u64 alignment;
+	/** the requested memory domains */
+	__u64 domains;
+	/** allocation flags */
+	__u64 domain_flags;
+};
+
+struct drm_amdgpu_gem_create_out  {
+	/** returned GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_gem_create {
+	struct drm_amdgpu_gem_create_in		in;
+	struct drm_amdgpu_gem_create_out	out;
+};
+
+/** Opcode to create new residency list.  */
+#define AMDGPU_BO_LIST_OP_CREATE	0
+/** Opcode to destroy previously created residency list */
+#define AMDGPU_BO_LIST_OP_DESTROY	1
+/** Opcode to update resource information in the list */
+#define AMDGPU_BO_LIST_OP_UPDATE	2
+
+struct drm_amdgpu_bo_list_in {
+	/** Type of operation */
+	__u32 operation;
+	/** Handle of list or 0 if we want to create one */
+	__u32 list_handle;
+	/** Number of BOs in list  */
+	__u32 bo_number;
+	/** Size of each element describing BO */
+	__u32 bo_info_size;
+	/** Pointer to array describing BOs */
+	__u64 bo_info_ptr;
+};
+
+struct drm_amdgpu_bo_list_entry {
+	/** Handle of BO */
+	__u32 bo_handle;
+	/** New (if specified) BO priority to be used during migration */
+	__u32 bo_priority;
+};
+
+struct drm_amdgpu_bo_list_out {
+	/** Handle of resource list  */
+	__u32 list_handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_bo_list {
+	struct drm_amdgpu_bo_list_in in;
+	struct drm_amdgpu_bo_list_out out;
+};
+
+/* context related */
+#define AMDGPU_CTX_OP_ALLOC_CTX	1
+#define AMDGPU_CTX_OP_FREE_CTX	2
+#define AMDGPU_CTX_OP_QUERY_STATE	3
+#define AMDGPU_CTX_OP_QUERY_STATE2	4
+#define AMDGPU_CTX_OP_GET_STABLE_PSTATE	5
+#define AMDGPU_CTX_OP_SET_STABLE_PSTATE	6
+
+/* GPU reset status */
+#define AMDGPU_CTX_NO_RESET		0
+/* this the context caused it */
+#define AMDGPU_CTX_GUILTY_RESET		1
+/* some other context caused it */
+#define AMDGPU_CTX_INNOCENT_RESET	2
+/* unknown cause */
+#define AMDGPU_CTX_UNKNOWN_RESET	3
+
+/* indicate gpu reset occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET    (1<<0)
+/* indicate vram lost occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
+/* indicate some job from this context once cause gpu hang */
+#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY   (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
+/* indicate that the reset hasn't completed yet */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
+
+/* Context priority level */
+#define AMDGPU_CTX_PRIORITY_UNSET       -2048
+#define AMDGPU_CTX_PRIORITY_VERY_LOW    -1023
+#define AMDGPU_CTX_PRIORITY_LOW         -512
+#define AMDGPU_CTX_PRIORITY_NORMAL      0
+/*
+ * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires
+ * CAP_SYS_NICE or DRM_MASTER
+*/
+#define AMDGPU_CTX_PRIORITY_HIGH        512
+#define AMDGPU_CTX_PRIORITY_VERY_HIGH   1023
+
+/* select a stable profiling pstate for perfmon tools */
+#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK  0xf
+#define AMDGPU_CTX_STABLE_PSTATE_NONE  0
+#define AMDGPU_CTX_STABLE_PSTATE_STANDARD  1
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK  2
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
+#define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+
+struct drm_amdgpu_ctx_in {
+	/** AMDGPU_CTX_OP_* */
+	__u32	op;
+	/** Flags */
+	__u32	flags;
+	__u32	ctx_id;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+};
+
+union drm_amdgpu_ctx_out {
+		struct {
+			__u32	ctx_id;
+			__u32	_pad;
+		} alloc;
+
+		struct {
+			/** For future use, no flags defined so far */
+			__u64	flags;
+			/** Number of resets caused by this context so far. */
+			__u32	hangs;
+			/** Reset status since the last call of the ioctl. */
+			__u32	reset_status;
+		} state;
+
+		struct {
+			__u32	flags;
+			__u32	_pad;
+		} pstate;
+};
+
+union drm_amdgpu_ctx {
+	struct drm_amdgpu_ctx_in in;
+	union drm_amdgpu_ctx_out out;
+};
+
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/* queue priority levels */
+/* low < normal low < normal high < high */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK  0x3
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
+/* for queues that need access to protected content */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated with this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	/**
+	 * @flags: flags used for queue parameters
+	 */
+	__u32 flags;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32 _pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the
+	 * wait queue and maintain the fence driver references in it.
+	 */
+	__u32	waitq_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
+/* sem related */
+#define AMDGPU_SEM_OP_CREATE_SEM        1
+#define AMDGPU_SEM_OP_WAIT_SEM	        2
+#define AMDGPU_SEM_OP_SIGNAL_SEM        3
+#define AMDGPU_SEM_OP_DESTROY_SEM       4
+#define AMDGPU_SEM_OP_IMPORT_SEM	5
+#define AMDGPU_SEM_OP_EXPORT_SEM	6
+
+struct drm_amdgpu_sem_in {
+	/** AMDGPU_SEM_OP_* */
+	uint32_t	op;
+	uint32_t        handle;
+	uint32_t	ctx_id;
+	uint32_t        ip_type;
+	uint32_t        ip_instance;
+	uint32_t        ring;
+	uint64_t        seq;
+};
+
+union drm_amdgpu_sem_out {
+	int32_t         fd;
+	uint32_t	handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	union drm_amdgpu_sem_out out;
+};
+
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID	1
+#define AMDGPU_VM_OP_UNRESERVE_VMID	2
+
+struct drm_amdgpu_vm_in {
+	/** AMDGPU_VM_OP_* */
+	__u32	op;
+	__u32	flags;
+};
+
+struct drm_amdgpu_vm_out {
+	/** For future use, no flags defined so far */
+	__u64	flags;
+};
+
+union drm_amdgpu_vm {
+	struct drm_amdgpu_vm_in in;
+	struct drm_amdgpu_vm_out out;
+};
+
+/* sched ioctl */
+#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2
+
+struct drm_amdgpu_sched_in {
+	/* AMDGPU_SCHED_OP_* */
+	__u32	op;
+	__u32	fd;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+	__u32   ctx_id;
+};
+
+union drm_amdgpu_sched {
+	struct drm_amdgpu_sched_in in;
+};
+
+/*
+ * This is not a reliable API and you should expect it to fail for any
+ * number of reasons and have fallback path that do not use userptr to
+ * perform any operation.
+ */
+#define AMDGPU_GEM_USERPTR_READONLY	(1 << 0)
+#define AMDGPU_GEM_USERPTR_ANONONLY	(1 << 1)
+#define AMDGPU_GEM_USERPTR_VALIDATE	(1 << 2)
+#define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
+
+struct drm_amdgpu_gem_userptr {
+	__u64		addr;
+	__u64		size;
+	/* AMDGPU_GEM_USERPTR_* */
+	__u32		flags;
+	/* Resulting GEM handle */
+	__u32		handle;
+};
+
+#define AMDGPU_GEM_DGMA_IMPORT			0
+#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR		1
+struct drm_amdgpu_gem_dgma {
+	__u64		addr;
+	__u64		size;
+	__u32		op;
+	__u32		handle;
+};
+
+/* SI-CI-VI: */
+/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
+#define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
+#define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
+#define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
+#define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
+#define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
+#define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
+#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
+#define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
+#define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
+#define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
+#define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
+#define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
+#define AMDGPU_TILING_NUM_BANKS_SHIFT			21
+#define AMDGPU_TILING_NUM_BANKS_MASK			0x3
+
+/* GFX9 - GFX11: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
+#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
+#define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
+#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
+#define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
+#define AMDGPU_TILING_SCANOUT_SHIFT			63
+#define AMDGPU_TILING_SCANOUT_MASK			0x1
+
+/* GFX12 and later: */
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT			0
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK			0x7
+/* These are DCC recompression settings for memory management: */
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT	3
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK	0x3 /* 0:64B, 1:128B, 2:256B */
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT		5
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK		0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT		8
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK		0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
+/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
+ * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */
+#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT	14
+#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK	0x1
+/* bit gap */
+#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT			63
+#define AMDGPU_TILING_GFX12_SCANOUT_MASK			0x1
+
+/* Set/Get helpers for tiling flags. */
+#define AMDGPU_TILING_SET(field, value) \
+	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+#define AMDGPU_TILING_GET(value, field) \
+	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+
+#define AMDGPU_GEM_METADATA_OP_SET_METADATA                  1
+#define AMDGPU_GEM_METADATA_OP_GET_METADATA                  2
+
+/** The same structure is shared for input/output */
+struct drm_amdgpu_gem_metadata {
+	/** GEM Object handle */
+	__u32	handle;
+	/** Do we want get or set metadata */
+	__u32	op;
+	struct {
+		/** For future use, no flags defined so far */
+		__u64	flags;
+		/** family specific tiling info */
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
+	} data;
+};
+
+struct drm_amdgpu_gem_mmap_in {
+	/** the GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_gem_mmap_out {
+	/** mmap offset from the vma offset manager */
+	__u64 addr_ptr;
+};
+
+union drm_amdgpu_gem_mmap {
+	struct drm_amdgpu_gem_mmap_in   in;
+	struct drm_amdgpu_gem_mmap_out out;
+};
+
+struct drm_amdgpu_gem_wait_idle_in {
+	/** GEM object handle */
+	__u32 handle;
+	/** For future use, no flags defined so far */
+	__u32 flags;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+};
+
+struct drm_amdgpu_gem_wait_idle_out {
+	/** BO status:  0 - BO is idle, 1 - BO is busy */
+	__u32 status;
+	/** Returned current memory domain */
+	__u32 domain;
+};
+
+union drm_amdgpu_gem_wait_idle {
+	struct drm_amdgpu_gem_wait_idle_in  in;
+	struct drm_amdgpu_gem_wait_idle_out out;
+};
+
+struct drm_amdgpu_wait_cs_in {
+	/* Command submission handle
+         * handle equals 0 means none to wait for
+         * handle equals ~0ull means wait for the latest sequence number
+         */
+	__u64 handle;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+};
+
+struct drm_amdgpu_wait_cs_out {
+	/** CS status:  0 - CS completed, 1 - CS still busy */
+	__u64 status;
+};
+
+union drm_amdgpu_wait_cs {
+	struct drm_amdgpu_wait_cs_in in;
+	struct drm_amdgpu_wait_cs_out out;
+};
+
+struct drm_amdgpu_fence {
+	__u32 ctx_id;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+	/** This points to uint64_t * which points to fences */
+	__u64 fences;
+	__u32 fence_count;
+	__u32 wait_all;
+	__u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+	__u32 status;
+	__u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+	struct drm_amdgpu_wait_fences_in in;
+	struct drm_amdgpu_wait_fences_out out;
+};
+
+#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
+#define AMDGPU_GEM_OP_SET_PLACEMENT		1
+
+/* Sets or returns a value associated with a buffer. */
+struct drm_amdgpu_gem_op {
+	/** GEM object handle */
+	__u32	handle;
+	/** AMDGPU_GEM_OP_* */
+	__u32	op;
+	/** Input or return value */
+	__u64	value;
+};
+
+#define AMDGPU_VA_OP_MAP			1
+#define AMDGPU_VA_OP_UNMAP			2
+#define AMDGPU_VA_OP_CLEAR			3
+#define AMDGPU_VA_OP_REPLACE			4
+
+/* Delay the page table update till the next CS */
+#define AMDGPU_VM_DELAY_UPDATE		(1 << 0)
+
+/* Mapping flags */
+/* readable mapping */
+#define AMDGPU_VM_PAGE_READABLE		(1 << 1)
+/* writable mapping */
+#define AMDGPU_VM_PAGE_WRITEABLE	(1 << 2)
+/* executable mapping, new for VI */
+#define AMDGPU_VM_PAGE_EXECUTABLE	(1 << 3)
+/* partially resident texture */
+#define AMDGPU_VM_PAGE_PRT		(1 << 4)
+/* MTYPE flags use bit 5 to 8 */
+#define AMDGPU_VM_MTYPE_MASK		(0xf << 5)
+/* Default MTYPE. Pre-AI must use this.  Recommended for newer ASICs. */
+#define AMDGPU_VM_MTYPE_DEFAULT		(0 << 5)
+/* Use Non Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_NC		(1 << 5)
+/* Use Write Combine MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_WC		(2 << 5)
+/* Use Cache Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_CC		(3 << 5)
+/* Use UnCached MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_UC		(4 << 5)
+/* Use Read Write MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_RW		(5 << 5)
+/* don't allocate MALL */
+#define AMDGPU_VM_PAGE_NOALLOC		(1 << 9)
+
+struct drm_amdgpu_gem_va {
+	/** GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+	/** AMDGPU_VA_OP_* */
+	__u32 operation;
+	/** AMDGPU_VM_PAGE_* */
+	__u32 flags;
+	/** va address to assign . Must be correctly aligned.*/
+	__u64 va_address;
+	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
+	__u64 offset_in_bo;
+	/** Specify mapping size. Must be correctly aligned. */
+	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
+};
+
+#define AMDGPU_HW_IP_GFX          0
+#define AMDGPU_HW_IP_COMPUTE      1
+#define AMDGPU_HW_IP_DMA          2
+#define AMDGPU_HW_IP_UVD          3
+#define AMDGPU_HW_IP_VCE          4
+#define AMDGPU_HW_IP_UVD_ENC      5
+#define AMDGPU_HW_IP_VCN_DEC      6
+/*
+ * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support
+ * both encoding and decoding jobs.
+ */
+#define AMDGPU_HW_IP_VCN_ENC      7
+#define AMDGPU_HW_IP_VCN_JPEG     8
+#define AMDGPU_HW_IP_VPE          9
+#define AMDGPU_HW_IP_NUM          10
+
+#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
+
+#define AMDGPU_CHUNK_ID_IB		0x01
+#define AMDGPU_CHUNK_ID_FENCE		0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
+#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW   0x0a
+
+struct drm_amdgpu_cs_chunk {
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
+};
+
+struct drm_amdgpu_cs_in {
+	/** Rendering context id */
+	__u32		ctx_id;
+	/**  Handle of resource list associated with CS */
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		flags;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
+};
+
+struct drm_amdgpu_cs_out {
+	__u64 handle;
+};
+
+union drm_amdgpu_cs {
+	struct drm_amdgpu_cs_in in;
+	struct drm_amdgpu_cs_out out;
+};
+
+/* Specify flags to be used for IB */
+
+/* This IB should be submitted to CE */
+#define AMDGPU_IB_FLAG_CE	(1<<0)
+
+/* Preamble flag, which means the IB could be dropped if no context switch */
+#define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
+
+/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
+#define AMDGPU_IB_FLAG_PREEMPT (1<<2)
+
+/* The IB fence should do the L2 writeback but not invalidate any shader
+ * caches (L2/vL1/sL1/I$). */
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
+/* Flag the IB as secure (TMZ)
+ */
+#define AMDGPU_IB_FLAGS_SECURE  (1 << 5)
+
+/* Tell KMD to flush and invalidate caches
+ */
+#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC  (1 << 6)
+
+struct drm_amdgpu_cs_chunk_ib {
+	__u32 _pad;
+	/** AMDGPU_IB_FLAG_* */
+	__u32 flags;
+	/** Virtual address to begin IB execution */
+	__u64 va_start;
+	/** Size of submission */
+	__u32 ib_bytes;
+	/** HW IP to submit to */
+	__u32 ip_type;
+	/** HW IP index of the same type to submit to  */
+	__u32 ip_instance;
+	/** Ring index to submit to */
+	__u32 ring;
+};
+
+struct drm_amdgpu_cs_chunk_dep {
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
+};
+
+struct drm_amdgpu_cs_chunk_fence {
+	__u32 handle;
+	__u32 offset;
+};
+
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
+struct drm_amdgpu_cs_chunk_syncobj {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+};
+
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2
+
+union drm_amdgpu_fence_to_handle {
+	struct {
+		struct drm_amdgpu_fence fence;
+		__u32 what;
+		__u32 pad;
+	} in;
+	struct {
+		__u32 handle;
+	} out;
+};
+
+struct drm_amdgpu_cs_chunk_data {
+	union {
+		struct drm_amdgpu_cs_chunk_ib		ib_data;
+		struct drm_amdgpu_cs_chunk_fence	fence_data;
+	};
+};
+
+#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW         0x1
+
+struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
+	__u64 shadow_va;
+	__u64 csa_va;
+	__u64 gds_va;
+	__u64 flags;
+};
+
+/*
+ *  Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
+ *
+ */
+#define AMDGPU_IDS_FLAGS_FUSION         0x1
+#define AMDGPU_IDS_FLAGS_PREEMPTION     0x2
+#define AMDGPU_IDS_FLAGS_TMZ            0x4
+#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8
+
+/*
+ *  Query h/w info: Flag identifying VF/PF/PT mode
+ *
+ */
+#define AMDGPU_IDS_FLAGS_MODE_MASK      0x300
+#define AMDGPU_IDS_FLAGS_MODE_SHIFT     0x8
+#define AMDGPU_IDS_FLAGS_MODE_PF        0x0
+#define AMDGPU_IDS_FLAGS_MODE_VF        0x1
+#define AMDGPU_IDS_FLAGS_MODE_PT        0x2
+
+/* indicate if acceleration can be working */
+#define AMDGPU_INFO_ACCEL_WORKING		0x00
+/* get the crtc_id from the mode object id? */
+#define AMDGPU_INFO_CRTC_FROM_ID		0x01
+/* query hw IP info */
+#define AMDGPU_INFO_HW_IP_INFO			0x02
+/* query hw IP instance count for the specified type */
+#define AMDGPU_INFO_HW_IP_COUNT			0x03
+/* timestamp for GL_ARB_timer_query */
+#define AMDGPU_INFO_TIMESTAMP			0x05
+/* Query the firmware version */
+#define AMDGPU_INFO_FW_VERSION			0x0e
+	/* Subquery id: Query VCE firmware version */
+	#define AMDGPU_INFO_FW_VCE		0x1
+	/* Subquery id: Query UVD firmware version */
+	#define AMDGPU_INFO_FW_UVD		0x2
+	/* Subquery id: Query GMC firmware version */
+	#define AMDGPU_INFO_FW_GMC		0x03
+	/* Subquery id: Query GFX ME firmware version */
+	#define AMDGPU_INFO_FW_GFX_ME		0x04
+	/* Subquery id: Query GFX PFP firmware version */
+	#define AMDGPU_INFO_FW_GFX_PFP		0x05
+	/* Subquery id: Query GFX CE firmware version */
+	#define AMDGPU_INFO_FW_GFX_CE		0x06
+	/* Subquery id: Query GFX RLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC		0x07
+	/* Subquery id: Query GFX MEC firmware version */
+	#define AMDGPU_INFO_FW_GFX_MEC		0x08
+	/* Subquery id: Query SMC firmware version */
+	#define AMDGPU_INFO_FW_SMC		0x0a
+	/* Subquery id: Query SDMA firmware version */
+	#define AMDGPU_INFO_FW_SDMA		0x0b
+	/* Subquery id: Query PSP SOS firmware version */
+	#define AMDGPU_INFO_FW_SOS		0x0c
+	/* Subquery id: Query PSP ASD firmware version */
+	#define AMDGPU_INFO_FW_ASD		0x0d
+	/* Subquery id: Query VCN firmware version */
+	#define AMDGPU_INFO_FW_VCN		0x0e
+	/* Subquery id: Query GFX RLC SRLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f
+	/* Subquery id: Query GFX RLC SRLG firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10
+	/* Subquery id: Query GFX RLC SRLS firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
+	/* Subquery id: Query DMCU firmware version */
+	#define AMDGPU_INFO_FW_DMCU		0x12
+	#define AMDGPU_INFO_FW_TA		0x13
+	/* Subquery id: Query DMCUB firmware version */
+	#define AMDGPU_INFO_FW_DMCUB		0x14
+	/* Subquery id: Query TOC firmware version */
+	#define AMDGPU_INFO_FW_TOC		0x15
+	/* Subquery id: Query CAP firmware version */
+	#define AMDGPU_INFO_FW_CAP		0x16
+	/* Subquery id: Query GFX RLCP firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCP		0x17
+	/* Subquery id: Query GFX RLCV firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCV		0x18
+	/* Subquery id: Query MES_KIQ firmware version */
+	#define AMDGPU_INFO_FW_MES_KIQ		0x19
+	/* Subquery id: Query MES firmware version */
+	#define AMDGPU_INFO_FW_MES		0x1a
+	/* Subquery id: Query IMU firmware version */
+	#define AMDGPU_INFO_FW_IMU		0x1b
+	/* Subquery id: Query VPE firmware version */
+	#define AMDGPU_INFO_FW_VPE		0x1c
+
+/* number of bytes moved for TTM migration */
+#define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
+/* the used VRAM size */
+#define AMDGPU_INFO_VRAM_USAGE			0x10
+/* the used GTT size */
+#define AMDGPU_INFO_GTT_USAGE			0x11
+/* Information about GDS, etc. resource configuration */
+#define AMDGPU_INFO_GDS_CONFIG			0x13
+/* Query information about VRAM and GTT domains */
+#define AMDGPU_INFO_VRAM_GTT			0x14
+/* Query information about register in MMR address space*/
+#define AMDGPU_INFO_READ_MMR_REG		0x15
+/* Query information about device: rev id, family, etc. */
+#define AMDGPU_INFO_DEV_INFO			0x16
+/* visible vram usage */
+#define AMDGPU_INFO_VIS_VRAM_USAGE		0x17
+/* number of TTM buffer evictions */
+#define AMDGPU_INFO_NUM_EVICTIONS		0x18
+/* Query memory about VRAM and GTT domains */
+#define AMDGPU_INFO_MEMORY			0x19
+/* Query vce clock table */
+#define AMDGPU_INFO_VCE_CLOCK_TABLE		0x1A
+/* Query vbios related information */
+#define AMDGPU_INFO_VBIOS			0x1B
+	/* Subquery id: Query vbios size */
+	#define AMDGPU_INFO_VBIOS_SIZE		0x1
+	/* Subquery id: Query vbios image */
+	#define AMDGPU_INFO_VBIOS_IMAGE		0x2
+	/* Subquery id: Query vbios info */
+	#define AMDGPU_INFO_VBIOS_INFO		0x3
+/* Query UVD handles */
+#define AMDGPU_INFO_NUM_HANDLES			0x1C
+/* Query sensor related information */
+#define AMDGPU_INFO_SENSOR			0x1D
+	/* Subquery id: Query GPU shader clock */
+	#define AMDGPU_INFO_SENSOR_GFX_SCLK		0x1
+	/* Subquery id: Query GPU memory clock */
+	#define AMDGPU_INFO_SENSOR_GFX_MCLK		0x2
+	/* Subquery id: Query GPU temperature */
+	#define AMDGPU_INFO_SENSOR_GPU_TEMP		0x3
+	/* Subquery id: Query GPU load */
+	#define AMDGPU_INFO_SENSOR_GPU_LOAD		0x4
+	/* Subquery id: Query average GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_AVG_POWER	0x5
+	/* Subquery id: Query northbridge voltage */
+	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
+	/* Subquery id: Query graphics voltage */
+	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
+	/* Subquery id: Query GPU stable pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK		0x8
+	/* Subquery id: Query GPU stable pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK		0x9
+	/* Subquery id: Query GPU peak pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK			0xa
+	/* Subquery id: Query GPU peak pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK			0xb
+	/* Subquery id: Query input GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER	0xc
+/* Number of VRAM page faults on CPU access. */
+#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
+#define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES	0x20
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC			(1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA			(1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX			(1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB			(1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB			(1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE			(1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP			(1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI			(1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF			(1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN			(1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM			(1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0			(1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1			(1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE			(1 << 13)
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS			0x21
+	/* Subquery id: Decode */
+	#define AMDGPU_INFO_VIDEO_CAPS_DECODE		0
+	/* Subquery id: Encode */
+	#define AMDGPU_INFO_VIDEO_CAPS_ENCODE		1
+/* Query the max number of IBs per gang per submission */
+#define AMDGPU_INFO_MAX_IBS			0x22
+/* query last page fault info */
+#define AMDGPU_INFO_GPUVM_FAULT			0x23
+/* query FW object size and alignment */
+#define AMDGPU_INFO_UQ_FW_AREAS			0x24
+
+/* Hybrid Stack Specific Defs*/
+/* gpu capability */
+#define AMDGPU_INFO_CAPABILITY			0x50
+/* virtual range */
+#define AMDGPU_INFO_VIRTUAL_RANGE		0x51
+/* query pin memory capability */
+#define AMDGPU_CAPABILITY_PIN_MEM_FLAG  (1 << 0)
+/* query direct gma capability */
+#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG	(1 << 1)
+
+#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
+#define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
+#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT	8
+#define AMDGPU_INFO_MMR_SH_INDEX_MASK	0xff
+
+struct drm_amdgpu_query_fw {
+	/** AMDGPU_INFO_FW_* */
+	__u32 fw_type;
+	/**
+	 * Index of the IP if there are more IPs of
+	 * the same type.
+	 */
+	__u32 ip_instance;
+	/**
+	 * Index of the engine. Whether this is used depends
+	 * on the firmware type. (e.g. MEC, SDMA)
+	 */
+	__u32 index;
+	__u32 _pad;
+};
+
+/* Input structure for the INFO ioctl */
+struct drm_amdgpu_info {
+	/* Where the return value will be stored */
+	__u64 return_pointer;
+	/* The size of the return value. Just like "size" in "snprintf",
+	 * it limits how many bytes the kernel can write. */
+	__u32 return_size;
+	/* The query request id. */
+	__u32 query;
+
+	union {
+		struct {
+			__u32 id;
+			__u32 _pad;
+		} mode_crtc;
+
+		struct {
+			/** AMDGPU_HW_IP_* */
+			__u32 type;
+			/**
+			 * Index of the IP if there are more IPs of the same
+			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
+			 */
+			__u32 ip_instance;
+		} query_hw_ip;
+
+		struct {
+			__u32 dword_offset;
+			/** number of registers to read */
+			__u32 count;
+			__u32 instance;
+			/** For future use, no flags defined so far */
+			__u32 flags;
+		} read_mmr_reg;
+
+		struct {
+			uint32_t aperture;
+			uint32_t _pad;
+		} virtual_range;
+
+		struct drm_amdgpu_query_fw query_fw;
+
+		struct {
+			__u32 type;
+			__u32 offset;
+		} vbios_info;
+
+		struct {
+			__u32 type;
+		} sensor_info;
+
+		struct {
+			__u32 type;
+		} video_cap;
+	};
+};
+
+struct drm_amdgpu_info_gds {
+	/** GDS GFX partition size */
+	__u32 gds_gfx_partition_size;
+	/** GDS compute partition size */
+	__u32 compute_partition_size;
+	/** total GDS memory size */
+	__u32 gds_total_size;
+	/** GWS size per GFX partition */
+	__u32 gws_per_gfx_partition;
+	/** GSW size per compute partition */
+	__u32 gws_per_compute_partition;
+	/** OA size per GFX partition */
+	__u32 oa_per_gfx_partition;
+	/** OA size per compute partition */
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_info_vram_gtt {
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
+};
+
+struct drm_amdgpu_heap_info {
+	/** max. physical memory */
+	__u64 total_heap_size;
+
+	/** Theoretical max. available memory in the given heap */
+	__u64 usable_heap_size;
+
+	/**
+	 * Number of bytes allocated in the heap. This includes all processes
+	 * and private allocations in the kernel. It changes when new buffers
+	 * are allocated, freed, and moved. It cannot be larger than
+	 * heap_size.
+	 */
+	__u64 heap_usage;
+
+	/**
+	 * Theoretical possible max. size of buffer which
+	 * could be allocated in the given heap
+	 */
+	__u64 max_allocation;
+};
+
+struct drm_amdgpu_memory_info {
+	struct drm_amdgpu_heap_info vram;
+	struct drm_amdgpu_heap_info cpu_accessible_vram;
+	struct drm_amdgpu_heap_info gtt;
+};
+
+struct drm_amdgpu_info_firmware {
+	__u32 ver;
+	__u32 feature;
+};
+
+struct drm_amdgpu_info_vbios {
+	__u8 name[64];
+	__u8 vbios_pn[64];
+	__u32 version;
+	__u32 pad;
+	__u8 vbios_ver_str[32];
+	__u8 date[32];
+};
+
+#define AMDGPU_VRAM_TYPE_UNKNOWN 0
+#define AMDGPU_VRAM_TYPE_GDDR1 1
+#define AMDGPU_VRAM_TYPE_DDR2  2
+#define AMDGPU_VRAM_TYPE_GDDR3 3
+#define AMDGPU_VRAM_TYPE_GDDR4 4
+#define AMDGPU_VRAM_TYPE_GDDR5 5
+#define AMDGPU_VRAM_TYPE_HBM   6
+#define AMDGPU_VRAM_TYPE_DDR3  7
+#define AMDGPU_VRAM_TYPE_DDR4  8
+#define AMDGPU_VRAM_TYPE_GDDR6 9
+#define AMDGPU_VRAM_TYPE_DDR5  10
+#define AMDGPU_VRAM_TYPE_LPDDR4 11
+#define AMDGPU_VRAM_TYPE_LPDDR5 12
+#define AMDGPU_VRAM_TYPE_HBM3E 13
+
+#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096
+
+struct drm_amdgpu_info_device {
+	/** PCI Device ID */
+	__u32 device_id;
+	/** Internal chip revision: A0, A1, etc.) */
+	__u32 chip_rev;
+	__u32 external_rev;
+	/** Revision id in PCI Config space */
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
+	/* in KHz */
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
+	/* cu information */
+	__u32 cu_active_number;
+	/* NOTE: cu_ao_mask is INVALID, DON'T use it */
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
+	/** Render backend pipe mask. One render backend is CB+DB. */
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	/* PCIe version (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_gen;
+	__u64 ids_flags;
+	/** Starting virtual address for UMDs. */
+	__u64 virtual_address_offset;
+	/** The maximum virtual address */
+	__u64 virtual_address_max;
+	/** Required alignment of virtual addresses. */
+	__u32 virtual_address_alignment;
+	/** Page table entry - fragment size */
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
+	/** constant engine ram size*/
+	__u32 ce_ram_size;
+	/** video memory type info*/
+	__u32 vram_type;
+	/** video memory bit width*/
+	__u32 vram_bit_width;
+	/* vce harvesting instance */
+	__u32 vce_harvest_config;
+	/* gfx double offchip LDS buffers */
+	__u32 gc_double_offchip_lds_buf;
+	/* NGG Primitive Buffer */
+	__u64 prim_buf_gpu_addr;
+	/* NGG Position Buffer */
+	__u64 pos_buf_gpu_addr;
+	/* NGG Control Sideband */
+	__u64 cntl_sb_buf_gpu_addr;
+	/* NGG Parameter Cache */
+	__u64 param_buf_gpu_addr;
+	__u32 prim_buf_size;
+	__u32 pos_buf_size;
+	__u32 cntl_sb_buf_size;
+	__u32 param_buf_size;
+	/* wavefront size*/
+	__u32 wave_front_size;
+	/* shader visible vgprs*/
+	__u32 num_shader_visible_vgprs;
+	/* CU per shader array*/
+	__u32 num_cu_per_sh;
+	/* number of tcc blocks*/
+	__u32 num_tcc_blocks;
+	/* gs vgt table depth*/
+	__u32 gs_vgt_table_depth;
+	/* gs primitive buffer depth*/
+	__u32 gs_prim_buffer_depth;
+	/* max gs wavefront per vgt*/
+	__u32 max_gs_waves_per_vgt;
+	/* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_num_lanes;
+	/* always on cu bitmap */
+	__u32 cu_ao_bitmap[4][4];
+	/** Starting high virtual address for UMDs. */
+	__u64 high_va_offset;
+	/** The maximum high virtual address */
+	__u64 high_va_max;
+	/* gfx10 pa_sc_tile_steering_override */
+	__u32 pa_sc_tile_steering_override;
+	/* disabled TCCs */
+	__u64 tcc_disabled_mask;
+	__u64 min_engine_clock;
+	__u64 min_memory_clock;
+	/* The following fields are only set on gfx11+, older chips set 0. */
+	__u32 tcp_cache_size;       /* AKA GL0, VMEM cache */
+	__u32 num_sqc_per_wgp;
+	__u32 sqc_data_cache_size;  /* AKA SMEM cache */
+	__u32 sqc_inst_cache_size;
+	__u32 gl1c_cache_size;
+	__u32 gl2c_cache_size;
+	__u64 mall_size;            /* AKA infinity cache */
+	/* high 32 bits of the rb pipes mask */
+	__u32 enabled_rb_pipes_mask_hi;
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+	/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
+	__u32 userq_ip_mask;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_hw_ip {
+	/** Version of h/w IP */
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
+	/** Capabilities */
+	__u64  capabilities_flags;
+	/** command buffer address start alignment*/
+	__u32  ib_start_alignment;
+	/** command buffer size alignment*/
+	__u32  ib_size_alignment;
+	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
+	__u32  available_rings;
+	/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
+	__u32  ip_discovery_version;
+};
+
+/* GFX metadata BO sizes and alignment info (in bytes) */
+struct drm_amdgpu_info_uq_fw_areas_gfx {
+	/* shadow area size */
+	__u32 shadow_size;
+	/* shadow area base virtual mem alignment */
+	__u32 shadow_alignment;
+	/* context save area size */
+	__u32 csa_size;
+	/* context save area base virtual mem alignment */
+	__u32 csa_alignment;
+};
+
+/* IP specific fw related information used in the
+ * subquery AMDGPU_INFO_UQ_FW_AREAS
+ */
+struct drm_amdgpu_info_uq_fw_areas {
+	union {
+		struct drm_amdgpu_info_uq_fw_areas_gfx gfx;
+	};
+};
+
+struct drm_amdgpu_info_num_handles {
+	/** Max handles as supported by firmware for UVD */
+	__u32  uvd_max_handles;
+	/** Handles currently in use for UVD */
+	__u32  uvd_used_handles;
+};
+
+#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES		6
+
+struct drm_amdgpu_info_vce_clock_table_entry {
+	/** System clock */
+	__u32 sclk;
+	/** Memory clock */
+	__u32 mclk;
+	/** VCE clock */
+	__u32 eclk;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_vce_clock_table {
+	struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES];
+	__u32 num_valid_entries;
+	__u32 pad;
+};
+
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2			0
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4			1
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1			2
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC		3
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC			4
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG			5
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9			6
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1			7
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT			8
+
+struct drm_amdgpu_info_video_codec_info {
+	__u32 valid;
+	__u32 max_width;
+	__u32 max_height;
+	__u32 max_pixels_per_frame;
+	__u32 max_level;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_video_caps {
+	struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
+};
+
+#define AMDGPU_VMHUB_TYPE_MASK			0xff
+#define AMDGPU_VMHUB_TYPE_SHIFT			0
+#define AMDGPU_VMHUB_TYPE_GFX			0
+#define AMDGPU_VMHUB_TYPE_MM0			1
+#define AMDGPU_VMHUB_TYPE_MM1			2
+#define AMDGPU_VMHUB_IDX_MASK			0xff00
+#define AMDGPU_VMHUB_IDX_SHIFT			8
+
+struct drm_amdgpu_info_gpuvm_fault {
+	__u64 addr;
+	__u32 status;
+	__u32 vmhub;
+};
+
+struct drm_amdgpu_info_uq_metadata_gfx {
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_uq_metadata {
+	union {
+		struct drm_amdgpu_info_uq_metadata_gfx gfx;
+	};
+};
+
+/*
+ * Supported GPU families
+ */
+#define AMDGPU_FAMILY_UNKNOWN			0
+#define AMDGPU_FAMILY_SI			110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */
+#define AMDGPU_FAMILY_CI			120 /* Bonaire, Hawaii */
+#define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
+#define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
+#define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI			141 /* Vega10 */
+#define AMDGPU_FAMILY_RV			142 /* Raven */
+#define AMDGPU_FAMILY_NV			143 /* Navi10 */
+#define AMDGPU_FAMILY_VGH			144 /* Van Gogh */
+#define AMDGPU_FAMILY_GC_11_0_0			145 /* GC 11.0.0 */
+#define AMDGPU_FAMILY_YC			146 /* Yellow Carp */
+#define AMDGPU_FAMILY_GC_11_0_1			148 /* GC 11.0.1 */
+#define AMDGPU_FAMILY_GC_10_3_6			149 /* GC 10.3.6 */
+#define AMDGPU_FAMILY_GC_10_3_7			151 /* GC 10.3.7 */
+#define AMDGPU_FAMILY_GC_11_5_0			150 /* GC 11.5.0 */
+#define AMDGPU_FAMILY_GC_12_0_0			152 /* GC 12.0.0 */
+
+#ifndef HAVE_DRM_COLOR_CTM_3X4
+/* FIXME wrong namespace! */
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+#endif
+
+/**
+ *  Definition of System Unified Address (SUA) apertures
+ */
+#define AMDGPU_SUA_APERTURE_PRIVATE    1
+#define AMDGPU_SUA_APERTURE_SHARED     2
+struct drm_amdgpu_virtual_range {
+	uint64_t start;
+	uint64_t end;
+};
+
+struct drm_amdgpu_capability {
+	__u32 flag;
+	__u32 direct_gma_size;
+};
+
+/*
+ * Definition of free sync enter and exit signals
+ * We may have more options in the future
+ */
+#define AMDGPU_FREESYNC_FULLSCREEN_ENTER                1
+#define AMDGPU_FREESYNC_FULLSCREEN_EXIT                 2
+
+struct drm_amdgpu_freesync {
+        __u32 op;                       /* AMDGPU_FREESYNC_FULLSCREEN_ENTER or */
+                                        /* AMDGPU_FREESYNC_FULLSCREEN_ENTER */
+        __u32 spare[7];
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/test/mockgpu/amd/amddriver.py b/test/mockgpu/amd/amddriver.py
index 29038a097a..0e498f1d05 100644
--- a/test/mockgpu/amd/amddriver.py
+++ b/test/mockgpu/amd/amddriver.py
@@ -1,6 +1,7 @@
 import pathlib, re, ctypes, mmap, collections, functools, copy, os
 import tinygrad.runtime.autogen.kfd as kfd
 import tinygrad.runtime.autogen.am.am as am
+import tinygrad.runtime.autogen.amdgpu_drm as amdgpu_drm
 from tinygrad.helpers import from_mv
 from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
 from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props
@@ -33,6 +34,16 @@ class DRMFileDesc(VirtFileDesc):
     super().__init__(fd)
     self.driver, self.gpu = driver, gpu
 
+  def ioctl(self, fd, request, argp):
+    struct = amdgpu_drm.struct_drm_amdgpu_info.from_address(argp)
+    if struct.query == amdgpu_drm.AMDGPU_INFO_DEV_INFO:
+      dev_info = amdgpu_drm.struct_drm_amdgpu_info_device.from_address(struct.return_pointer)
+      # mock of gfx1100
+      for se in range(4):
+        for sa in range(4): dev_info.cu_bitmap[se][sa] = 0xff if (se * 4 + sa) < 12 else 0
+      return 0
+    raise NotImplementedError(f"unknown DRM ioctl query {struct.query}")
+
   def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
 
 class AMDDriver(VirtDriver):
diff --git a/tinygrad/runtime/autogen/__init__.py b/tinygrad/runtime/autogen/__init__.py
index 9ad96bb33f..5c5174b335 100644
--- a/tinygrad/runtime/autogen/__init__.py
+++ b/tinygrad/runtime/autogen/__init__.py
@@ -105,6 +105,7 @@ def __getattr__(nm):
     case "amd_gpu": return load("amd_gpu", None, [root/f"extra/hip_gpu_driver/{s}.h" for s in ["sdma_registers", "nvd", "gc_11_0_0_offset",
                                                                                                "sienna_cichlid_ip_offset"]],
                                 args=["-I/opt/rocm/include", "-x", "c++"])
+    case "amdgpu_drm": return load("amdgpu_drm", None, [ "/usr/include/drm/drm.h", *[root/f"extra/hip_gpu_driver/{s}.h" for s in ["amdgpu_drm"]]])
     case "kgsl": return load("kgsl", None, [root/"extra/qcom_gpu_driver/msm_kgsl.h"], args=["-D__user="])
     case "qcom_dsp":
       return load("qcom_dsp", None, [root/f"extra/dsp/include/{s}.h" for s in ["ion", "msm_ion", "adsprpc_shared", "remote_default", "apps_std"]])
diff --git a/tinygrad/runtime/autogen/amdgpu_drm.py b/tinygrad/runtime/autogen/amdgpu_drm.py
new file mode 100644
index 0000000000..6e967ed763
--- /dev/null
+++ b/tinygrad/runtime/autogen/amdgpu_drm.py
@@ -0,0 +1,1593 @@
+# mypy: disable-error-code="empty-body"
+from __future__ import annotations
+import ctypes
+from typing import Annotated, Literal, TypeAlias
+from tinygrad.runtime.support.c import _IO, _IOW, _IOR, _IOWR
+from tinygrad.runtime.support import c
+drm_handle_t: TypeAlias = Annotated[int, ctypes.c_uint32]
+drm_context_t: TypeAlias = Annotated[int, ctypes.c_uint32]
+drm_drawable_t: TypeAlias = Annotated[int, ctypes.c_uint32]
+drm_magic_t: TypeAlias = Annotated[int, ctypes.c_uint32]
+@c.record
+class struct_drm_clip_rect(c.Struct):
+  SIZE = 8
+  x1: Annotated[Annotated[int, ctypes.c_uint16], 0]
+  y1: Annotated[Annotated[int, ctypes.c_uint16], 2]
+  x2: Annotated[Annotated[int, ctypes.c_uint16], 4]
+  y2: Annotated[Annotated[int, ctypes.c_uint16], 6]
+@c.record
+class struct_drm_drawable_info(c.Struct):
+  SIZE = 16
+  num_rects: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  rects: Annotated[c.POINTER[struct_drm_clip_rect], 8]
+@c.record
+class struct_drm_tex_region(c.Struct):
+  SIZE = 8
+  next: Annotated[Annotated[int, ctypes.c_ubyte], 0]
+  prev: Annotated[Annotated[int, ctypes.c_ubyte], 1]
+  in_use: Annotated[Annotated[int, ctypes.c_ubyte], 2]
+  padding: Annotated[Annotated[int, ctypes.c_ubyte], 3]
+  age: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_hw_lock(c.Struct):
+  SIZE = 64
+  lock: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  padding: Annotated[c.Array[Annotated[bytes, ctypes.c_char], Literal[60]], 4]
+@c.record
+class struct_drm_version(c.Struct):
+  SIZE = 64
+  version_major: Annotated[Annotated[int, ctypes.c_int32], 0]
+  version_minor: Annotated[Annotated[int, ctypes.c_int32], 4]
+  version_patchlevel: Annotated[Annotated[int, ctypes.c_int32], 8]
+  name_len: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  name: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 24]
+  date_len: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  date: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 40]
+  desc_len: Annotated[Annotated[int, ctypes.c_uint64], 48]
+  desc: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 56]
+__kernel_size_t: TypeAlias = Annotated[int, ctypes.c_uint64]
+@c.record
+class struct_drm_unique(c.Struct):
+  SIZE = 16
+  unique_len: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  unique: Annotated[c.POINTER[Annotated[bytes, ctypes.c_char]], 8]
+@c.record
+class struct_drm_list(c.Struct):
+  SIZE = 16
+  count: Annotated[Annotated[int, ctypes.c_int32], 0]
+  version: Annotated[c.POINTER[struct_drm_version], 8]
+@c.record
+class struct_drm_block(c.Struct):
+  SIZE = 4
+  unused: Annotated[Annotated[int, ctypes.c_int32], 0]
+@c.record
+class struct_drm_control(c.Struct):
+  SIZE = 8
+  func: Annotated[struct_drm_control_func, 0]
+  irq: Annotated[Annotated[int, ctypes.c_int32], 4]
+class struct_drm_control_func(Annotated[int, ctypes.c_uint32], c.Enum): pass
+DRM_ADD_COMMAND = struct_drm_control_func.define('DRM_ADD_COMMAND', 0)
+DRM_RM_COMMAND = struct_drm_control_func.define('DRM_RM_COMMAND', 1)
+DRM_INST_HANDLER = struct_drm_control_func.define('DRM_INST_HANDLER', 2)
+DRM_UNINST_HANDLER = struct_drm_control_func.define('DRM_UNINST_HANDLER', 3)
+
+class enum_drm_map_type(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_FRAME_BUFFER = enum_drm_map_type.define('_DRM_FRAME_BUFFER', 0)
+_DRM_REGISTERS = enum_drm_map_type.define('_DRM_REGISTERS', 1)
+_DRM_SHM = enum_drm_map_type.define('_DRM_SHM', 2)
+_DRM_AGP = enum_drm_map_type.define('_DRM_AGP', 3)
+_DRM_SCATTER_GATHER = enum_drm_map_type.define('_DRM_SCATTER_GATHER', 4)
+_DRM_CONSISTENT = enum_drm_map_type.define('_DRM_CONSISTENT', 5)
+
+class enum_drm_map_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_RESTRICTED = enum_drm_map_flags.define('_DRM_RESTRICTED', 1)
+_DRM_READ_ONLY = enum_drm_map_flags.define('_DRM_READ_ONLY', 2)
+_DRM_LOCKED = enum_drm_map_flags.define('_DRM_LOCKED', 4)
+_DRM_KERNEL = enum_drm_map_flags.define('_DRM_KERNEL', 8)
+_DRM_WRITE_COMBINING = enum_drm_map_flags.define('_DRM_WRITE_COMBINING', 16)
+_DRM_CONTAINS_LOCK = enum_drm_map_flags.define('_DRM_CONTAINS_LOCK', 32)
+_DRM_REMOVABLE = enum_drm_map_flags.define('_DRM_REMOVABLE', 64)
+_DRM_DRIVER = enum_drm_map_flags.define('_DRM_DRIVER', 128)
+
+@c.record
+class struct_drm_ctx_priv_map(c.Struct):
+  SIZE = 16
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  handle: Annotated[ctypes.c_void_p, 8]
+@c.record
+class struct_drm_map(c.Struct):
+  SIZE = 40
+  offset: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  size: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  type: Annotated[enum_drm_map_type, 16]
+  flags: Annotated[enum_drm_map_flags, 20]
+  handle: Annotated[ctypes.c_void_p, 24]
+  mtrr: Annotated[Annotated[int, ctypes.c_int32], 32]
+@c.record
+class struct_drm_client(c.Struct):
+  SIZE = 40
+  idx: Annotated[Annotated[int, ctypes.c_int32], 0]
+  auth: Annotated[Annotated[int, ctypes.c_int32], 4]
+  pid: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  uid: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  magic: Annotated[Annotated[int, ctypes.c_uint64], 24]
+  iocs: Annotated[Annotated[int, ctypes.c_uint64], 32]
+class enum_drm_stat_type(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_STAT_LOCK = enum_drm_stat_type.define('_DRM_STAT_LOCK', 0)
+_DRM_STAT_OPENS = enum_drm_stat_type.define('_DRM_STAT_OPENS', 1)
+_DRM_STAT_CLOSES = enum_drm_stat_type.define('_DRM_STAT_CLOSES', 2)
+_DRM_STAT_IOCTLS = enum_drm_stat_type.define('_DRM_STAT_IOCTLS', 3)
+_DRM_STAT_LOCKS = enum_drm_stat_type.define('_DRM_STAT_LOCKS', 4)
+_DRM_STAT_UNLOCKS = enum_drm_stat_type.define('_DRM_STAT_UNLOCKS', 5)
+_DRM_STAT_VALUE = enum_drm_stat_type.define('_DRM_STAT_VALUE', 6)
+_DRM_STAT_BYTE = enum_drm_stat_type.define('_DRM_STAT_BYTE', 7)
+_DRM_STAT_COUNT = enum_drm_stat_type.define('_DRM_STAT_COUNT', 8)
+_DRM_STAT_IRQ = enum_drm_stat_type.define('_DRM_STAT_IRQ', 9)
+_DRM_STAT_PRIMARY = enum_drm_stat_type.define('_DRM_STAT_PRIMARY', 10)
+_DRM_STAT_SECONDARY = enum_drm_stat_type.define('_DRM_STAT_SECONDARY', 11)
+_DRM_STAT_DMA = enum_drm_stat_type.define('_DRM_STAT_DMA', 12)
+_DRM_STAT_SPECIAL = enum_drm_stat_type.define('_DRM_STAT_SPECIAL', 13)
+_DRM_STAT_MISSED = enum_drm_stat_type.define('_DRM_STAT_MISSED', 14)
+
+@c.record
+class struct_drm_stats(c.Struct):
+  SIZE = 248
+  count: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  data: Annotated[c.Array[struct_drm_stats_data, Literal[15]], 8]
+@c.record
+class struct_drm_stats_data(c.Struct):
+  SIZE = 16
+  value: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  type: Annotated[enum_drm_stat_type, 8]
+class enum_drm_lock_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_LOCK_READY = enum_drm_lock_flags.define('_DRM_LOCK_READY', 1)
+_DRM_LOCK_QUIESCENT = enum_drm_lock_flags.define('_DRM_LOCK_QUIESCENT', 2)
+_DRM_LOCK_FLUSH = enum_drm_lock_flags.define('_DRM_LOCK_FLUSH', 4)
+_DRM_LOCK_FLUSH_ALL = enum_drm_lock_flags.define('_DRM_LOCK_FLUSH_ALL', 8)
+_DRM_HALT_ALL_QUEUES = enum_drm_lock_flags.define('_DRM_HALT_ALL_QUEUES', 16)
+_DRM_HALT_CUR_QUEUES = enum_drm_lock_flags.define('_DRM_HALT_CUR_QUEUES', 32)
+
+@c.record
+class struct_drm_lock(c.Struct):
+  SIZE = 8
+  context: Annotated[Annotated[int, ctypes.c_int32], 0]
+  flags: Annotated[enum_drm_lock_flags, 4]
+class enum_drm_dma_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_DMA_BLOCK = enum_drm_dma_flags.define('_DRM_DMA_BLOCK', 1)
+_DRM_DMA_WHILE_LOCKED = enum_drm_dma_flags.define('_DRM_DMA_WHILE_LOCKED', 2)
+_DRM_DMA_PRIORITY = enum_drm_dma_flags.define('_DRM_DMA_PRIORITY', 4)
+_DRM_DMA_WAIT = enum_drm_dma_flags.define('_DRM_DMA_WAIT', 16)
+_DRM_DMA_SMALLER_OK = enum_drm_dma_flags.define('_DRM_DMA_SMALLER_OK', 32)
+_DRM_DMA_LARGER_OK = enum_drm_dma_flags.define('_DRM_DMA_LARGER_OK', 64)
+
+@c.record
+class struct_drm_buf_desc(c.Struct):
+  SIZE = 32
+  count: Annotated[Annotated[int, ctypes.c_int32], 0]
+  size: Annotated[Annotated[int, ctypes.c_int32], 4]
+  low_mark: Annotated[Annotated[int, ctypes.c_int32], 8]
+  high_mark: Annotated[Annotated[int, ctypes.c_int32], 12]
+  flags: Annotated[struct_drm_buf_desc_flags, 16]
+  agp_start: Annotated[Annotated[int, ctypes.c_uint64], 24]
+class struct_drm_buf_desc_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_PAGE_ALIGN = struct_drm_buf_desc_flags.define('_DRM_PAGE_ALIGN', 1)
+_DRM_AGP_BUFFER = struct_drm_buf_desc_flags.define('_DRM_AGP_BUFFER', 2)
+_DRM_SG_BUFFER = struct_drm_buf_desc_flags.define('_DRM_SG_BUFFER', 4)
+_DRM_FB_BUFFER = struct_drm_buf_desc_flags.define('_DRM_FB_BUFFER', 8)
+_DRM_PCI_BUFFER_RO = struct_drm_buf_desc_flags.define('_DRM_PCI_BUFFER_RO', 16)
+
+@c.record
+class struct_drm_buf_info(c.Struct):
+  SIZE = 16
+  count: Annotated[Annotated[int, ctypes.c_int32], 0]
+  list: Annotated[c.POINTER[struct_drm_buf_desc], 8]
+@c.record
+class struct_drm_buf_free(c.Struct):
+  SIZE = 16
+  count: Annotated[Annotated[int, ctypes.c_int32], 0]
+  list: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 8]
+@c.record
+class struct_drm_buf_pub(c.Struct):
+  SIZE = 24
+  idx: Annotated[Annotated[int, ctypes.c_int32], 0]
+  total: Annotated[Annotated[int, ctypes.c_int32], 4]
+  used: Annotated[Annotated[int, ctypes.c_int32], 8]
+  address: Annotated[ctypes.c_void_p, 16]
+@c.record
+class struct_drm_buf_map(c.Struct):
+  SIZE = 24
+  count: Annotated[Annotated[int, ctypes.c_int32], 0]
+  virtual: Annotated[ctypes.c_void_p, 8]
+  list: Annotated[c.POINTER[struct_drm_buf_pub], 16]
+@c.record
+class struct_drm_dma(c.Struct):
+  SIZE = 64
+  context: Annotated[Annotated[int, ctypes.c_int32], 0]
+  send_count: Annotated[Annotated[int, ctypes.c_int32], 4]
+  send_indices: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 8]
+  send_sizes: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 16]
+  flags: Annotated[enum_drm_dma_flags, 24]
+  request_count: Annotated[Annotated[int, ctypes.c_int32], 28]
+  request_size: Annotated[Annotated[int, ctypes.c_int32], 32]
+  request_indices: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 40]
+  request_sizes: Annotated[c.POINTER[Annotated[int, ctypes.c_int32]], 48]
+  granted_count: Annotated[Annotated[int, ctypes.c_int32], 56]
+class enum_drm_ctx_flags(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_CONTEXT_PRESERVED = enum_drm_ctx_flags.define('_DRM_CONTEXT_PRESERVED', 1)
+_DRM_CONTEXT_2DONLY = enum_drm_ctx_flags.define('_DRM_CONTEXT_2DONLY', 2)
+
+@c.record
+class struct_drm_ctx(c.Struct):
+  SIZE = 8
+  handle: Annotated[drm_context_t, 0]
+  flags: Annotated[enum_drm_ctx_flags, 4]
+@c.record
+class struct_drm_ctx_res(c.Struct):
+  SIZE = 16
+  count: Annotated[Annotated[int, ctypes.c_int32], 0]
+  contexts: Annotated[c.POINTER[struct_drm_ctx], 8]
+@c.record
+class struct_drm_draw(c.Struct):
+  SIZE = 4
+  handle: Annotated[drm_drawable_t, 0]
+class drm_drawable_info_type_t(Annotated[int, ctypes.c_uint32], c.Enum): pass
+DRM_DRAWABLE_CLIPRECTS = drm_drawable_info_type_t.define('DRM_DRAWABLE_CLIPRECTS', 0)
+
+@c.record
+class struct_drm_update_draw(c.Struct):
+  SIZE = 24
+  handle: Annotated[drm_drawable_t, 0]
+  type: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  num: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  data: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_auth(c.Struct):
+  SIZE = 4
+  magic: Annotated[drm_magic_t, 0]
+@c.record
+class struct_drm_irq_busid(c.Struct):
+  SIZE = 16
+  irq: Annotated[Annotated[int, ctypes.c_int32], 0]
+  busnum: Annotated[Annotated[int, ctypes.c_int32], 4]
+  devnum: Annotated[Annotated[int, ctypes.c_int32], 8]
+  funcnum: Annotated[Annotated[int, ctypes.c_int32], 12]
+class enum_drm_vblank_seq_type(Annotated[int, ctypes.c_uint32], c.Enum): pass
+_DRM_VBLANK_ABSOLUTE = enum_drm_vblank_seq_type.define('_DRM_VBLANK_ABSOLUTE', 0)
+_DRM_VBLANK_RELATIVE = enum_drm_vblank_seq_type.define('_DRM_VBLANK_RELATIVE', 1)
+_DRM_VBLANK_HIGH_CRTC_MASK = enum_drm_vblank_seq_type.define('_DRM_VBLANK_HIGH_CRTC_MASK', 62)
+_DRM_VBLANK_EVENT = enum_drm_vblank_seq_type.define('_DRM_VBLANK_EVENT', 67108864)
+_DRM_VBLANK_FLIP = enum_drm_vblank_seq_type.define('_DRM_VBLANK_FLIP', 134217728)
+_DRM_VBLANK_NEXTONMISS = enum_drm_vblank_seq_type.define('_DRM_VBLANK_NEXTONMISS', 268435456)
+_DRM_VBLANK_SECONDARY = enum_drm_vblank_seq_type.define('_DRM_VBLANK_SECONDARY', 536870912)
+_DRM_VBLANK_SIGNAL = enum_drm_vblank_seq_type.define('_DRM_VBLANK_SIGNAL', 1073741824)
+
+@c.record
+class struct_drm_wait_vblank_request(c.Struct):
+  SIZE = 16
+  type: Annotated[enum_drm_vblank_seq_type, 0]
+  sequence: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  signal: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_wait_vblank_reply(c.Struct):
+  SIZE = 24
+  type: Annotated[enum_drm_vblank_seq_type, 0]
+  sequence: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  tval_sec: Annotated[Annotated[int, ctypes.c_int64], 8]
+  tval_usec: Annotated[Annotated[int, ctypes.c_int64], 16]
+@c.record
+class union_drm_wait_vblank(c.Struct):
+  SIZE = 24
+  request: Annotated[struct_drm_wait_vblank_request, 0]
+  reply: Annotated[struct_drm_wait_vblank_reply, 0]
+@c.record
+class struct_drm_modeset_ctl(c.Struct):
+  SIZE = 8
+  crtc: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  cmd: Annotated[Annotated[int, ctypes.c_uint32], 4]
+__u32: TypeAlias = Annotated[int, ctypes.c_uint32]
+@c.record
+class struct_drm_agp_mode(c.Struct):
+  SIZE = 8
+  mode: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class struct_drm_agp_buffer(c.Struct):
+  SIZE = 32
+  size: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  handle: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  type: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  physical: Annotated[Annotated[int, ctypes.c_uint64], 24]
+@c.record
+class struct_drm_agp_binding(c.Struct):
+  SIZE = 16
+  handle: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  offset: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_agp_info(c.Struct):
+  SIZE = 56
+  agp_version_major: Annotated[Annotated[int, ctypes.c_int32], 0]
+  agp_version_minor: Annotated[Annotated[int, ctypes.c_int32], 4]
+  mode: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  aperture_base: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  aperture_size: Annotated[Annotated[int, ctypes.c_uint64], 24]
+  memory_allowed: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  memory_used: Annotated[Annotated[int, ctypes.c_uint64], 40]
+  id_vendor: Annotated[Annotated[int, ctypes.c_uint16], 48]
+  id_device: Annotated[Annotated[int, ctypes.c_uint16], 50]
+@c.record
+class struct_drm_scatter_gather(c.Struct):
+  SIZE = 16
+  size: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  handle: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_set_version(c.Struct):
+  SIZE = 16
+  drm_di_major: Annotated[Annotated[int, ctypes.c_int32], 0]
+  drm_di_minor: Annotated[Annotated[int, ctypes.c_int32], 4]
+  drm_dd_major: Annotated[Annotated[int, ctypes.c_int32], 8]
+  drm_dd_minor: Annotated[Annotated[int, ctypes.c_int32], 12]
+@c.record
+class struct_drm_gem_close(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_gem_flink(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  name: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_gem_open(c.Struct):
+  SIZE = 16
+  name: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  size: Annotated[Annotated[int, ctypes.c_uint64], 8]
+__u64: TypeAlias = Annotated[int, ctypes.c_uint64]
+@c.record
+class struct_drm_get_cap(c.Struct):
+  SIZE = 16
+  capability: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  value: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_set_client_cap(c.Struct):
+  SIZE = 16
+  capability: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  value: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_prime_handle(c.Struct):
+  SIZE = 12
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  fd: Annotated[Annotated[int, ctypes.c_int32], 8]
+__s32: TypeAlias = Annotated[int, ctypes.c_int32]
+@c.record
+class struct_drm_syncobj_create(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_syncobj_destroy(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_syncobj_handle(c.Struct):
+  SIZE = 16
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  fd: Annotated[Annotated[int, ctypes.c_int32], 8]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_syncobj_transfer(c.Struct):
+  SIZE = 32
+  src_handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  dst_handle: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  src_point: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  dst_point: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class struct_drm_syncobj_wait(c.Struct):
+  SIZE = 40
+  handles: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  timeout_nsec: Annotated[Annotated[int, ctypes.c_int64], 8]
+  count_handles: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  first_signaled: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 28]
+  deadline_nsec: Annotated[Annotated[int, ctypes.c_uint64], 32]
+__s64: TypeAlias = Annotated[int, ctypes.c_int64]
+@c.record
+class struct_drm_syncobj_timeline_wait(c.Struct):
+  SIZE = 48
+  handles: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  points: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  timeout_nsec: Annotated[Annotated[int, ctypes.c_int64], 16]
+  count_handles: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 28]
+  first_signaled: Annotated[Annotated[int, ctypes.c_uint32], 32]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 36]
+  deadline_nsec: Annotated[Annotated[int, ctypes.c_uint64], 40]
+@c.record
+class struct_drm_syncobj_eventfd(c.Struct):
+  SIZE = 24
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  point: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  fd: Annotated[Annotated[int, ctypes.c_int32], 16]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 20]
+@c.record
+class struct_drm_syncobj_array(c.Struct):
+  SIZE = 16
+  handles: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  count_handles: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_syncobj_timeline_array(c.Struct):
+  SIZE = 24
+  handles: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  points: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  count_handles: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 20]
+@c.record
+class struct_drm_crtc_get_sequence(c.Struct):
+  SIZE = 24
+  crtc_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  active: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  sequence: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  sequence_ns: Annotated[Annotated[int, ctypes.c_int64], 16]
+@c.record
+class struct_drm_crtc_queue_sequence(c.Struct):
+  SIZE = 24
+  crtc_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  sequence: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  user_data: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_event(c.Struct):
+  SIZE = 8
+  type: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  length: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_event_vblank(c.Struct):
+  SIZE = 32
+  base: Annotated[struct_drm_event, 0]
+  user_data: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  tv_sec: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  tv_usec: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  sequence: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  crtc_id: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class struct_drm_event_crtc_sequence(c.Struct):
+  SIZE = 32
+  base: Annotated[struct_drm_event, 0]
+  user_data: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  time_ns: Annotated[Annotated[int, ctypes.c_int64], 16]
+  sequence: Annotated[Annotated[int, ctypes.c_uint64], 24]
+drm_clip_rect_t: TypeAlias = struct_drm_clip_rect
+drm_drawable_info_t: TypeAlias = struct_drm_drawable_info
+drm_tex_region_t: TypeAlias = struct_drm_tex_region
+drm_hw_lock_t: TypeAlias = struct_drm_hw_lock
+drm_version_t: TypeAlias = struct_drm_version
+drm_unique_t: TypeAlias = struct_drm_unique
+drm_list_t: TypeAlias = struct_drm_list
+drm_block_t: TypeAlias = struct_drm_block
+drm_control_t: TypeAlias = struct_drm_control
+drm_map_type_t: TypeAlias = enum_drm_map_type
+drm_map_flags_t: TypeAlias = enum_drm_map_flags
+drm_ctx_priv_map_t: TypeAlias = struct_drm_ctx_priv_map
+drm_map_t: TypeAlias = struct_drm_map
+drm_client_t: TypeAlias = struct_drm_client
+drm_stat_type_t: TypeAlias = enum_drm_stat_type
+drm_stats_t: TypeAlias = struct_drm_stats
+drm_lock_flags_t: TypeAlias = enum_drm_lock_flags
+drm_lock_t: TypeAlias = struct_drm_lock
+drm_dma_flags_t: TypeAlias = enum_drm_dma_flags
+drm_buf_desc_t: TypeAlias = struct_drm_buf_desc
+drm_buf_info_t: TypeAlias = struct_drm_buf_info
+drm_buf_free_t: TypeAlias = struct_drm_buf_free
+drm_buf_pub_t: TypeAlias = struct_drm_buf_pub
+drm_buf_map_t: TypeAlias = struct_drm_buf_map
+drm_dma_t: TypeAlias = struct_drm_dma
+drm_wait_vblank_t: TypeAlias = union_drm_wait_vblank
+drm_agp_mode_t: TypeAlias = struct_drm_agp_mode
+drm_ctx_flags_t: TypeAlias = enum_drm_ctx_flags
+drm_ctx_t: TypeAlias = struct_drm_ctx
+drm_ctx_res_t: TypeAlias = struct_drm_ctx_res
+drm_draw_t: TypeAlias = struct_drm_draw
+drm_update_draw_t: TypeAlias = struct_drm_update_draw
+drm_auth_t: TypeAlias = struct_drm_auth
+drm_irq_busid_t: TypeAlias = struct_drm_irq_busid
+drm_vblank_seq_type_t: TypeAlias = enum_drm_vblank_seq_type
+drm_agp_buffer_t: TypeAlias = struct_drm_agp_buffer
+drm_agp_binding_t: TypeAlias = struct_drm_agp_binding
+drm_agp_info_t: TypeAlias = struct_drm_agp_info
+drm_scatter_gather_t: TypeAlias = struct_drm_scatter_gather
+drm_set_version_t: TypeAlias = struct_drm_set_version
+@c.record
+class struct_drm_amdgpu_gem_create_in(c.Struct):
+  SIZE = 32
+  bo_size: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  alignment: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  domains: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  domain_flags: Annotated[Annotated[int, ctypes.c_uint64], 24]
+@c.record
+class struct_drm_amdgpu_gem_create_out(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_gem_create(c.Struct):
+  SIZE = 32
+  _in: Annotated[struct_drm_amdgpu_gem_create_in, 0]
+  out: Annotated[struct_drm_amdgpu_gem_create_out, 0]
+@c.record
+class struct_drm_amdgpu_bo_list_in(c.Struct):
+  SIZE = 24
+  operation: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  list_handle: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  bo_number: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  bo_info_size: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  bo_info_ptr: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_amdgpu_bo_list_entry(c.Struct):
+  SIZE = 8
+  bo_handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  bo_priority: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_bo_list_out(c.Struct):
+  SIZE = 8
+  list_handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_bo_list(c.Struct):
+  SIZE = 24
+  _in: Annotated[struct_drm_amdgpu_bo_list_in, 0]
+  out: Annotated[struct_drm_amdgpu_bo_list_out, 0]
+@c.record
+class struct_drm_amdgpu_ctx_in(c.Struct):
+  SIZE = 16
+  op: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  priority: Annotated[Annotated[int, ctypes.c_int32], 12]
+@c.record
+class union_drm_amdgpu_ctx_out(c.Struct):
+  SIZE = 16
+  alloc: Annotated[union_drm_amdgpu_ctx_out_alloc, 0]
+  state: Annotated[union_drm_amdgpu_ctx_out_state, 0]
+  pstate: Annotated[union_drm_amdgpu_ctx_out_pstate, 0]
+@c.record
+class union_drm_amdgpu_ctx_out_alloc(c.Struct):
+  SIZE = 8
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_ctx_out_state(c.Struct):
+  SIZE = 16
+  flags: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  hangs: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  reset_status: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class union_drm_amdgpu_ctx_out_pstate(c.Struct):
+  SIZE = 8
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_ctx(c.Struct):
+  SIZE = 16
+  _in: Annotated[struct_drm_amdgpu_ctx_in, 0]
+  out: Annotated[union_drm_amdgpu_ctx_out, 0]
+@c.record
+class struct_drm_amdgpu_userq_in(c.Struct):
+  SIZE = 72
+  op: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  queue_id: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  ip_type: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  doorbell_handle: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  doorbell_offset: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  queue_va: Annotated[Annotated[int, ctypes.c_uint64], 24]
+  queue_size: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  rptr_va: Annotated[Annotated[int, ctypes.c_uint64], 40]
+  wptr_va: Annotated[Annotated[int, ctypes.c_uint64], 48]
+  mqd: Annotated[Annotated[int, ctypes.c_uint64], 56]
+  mqd_size: Annotated[Annotated[int, ctypes.c_uint64], 64]
+@c.record
+class struct_drm_amdgpu_userq_out(c.Struct):
+  SIZE = 8
+  queue_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_userq(c.Struct):
+  SIZE = 72
+  _in: Annotated[struct_drm_amdgpu_userq_in, 0]
+  out: Annotated[struct_drm_amdgpu_userq_out, 0]
+@c.record
+class struct_drm_amdgpu_userq_mqd_gfx11(c.Struct):
+  SIZE = 16
+  shadow_va: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  csa_va: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_amdgpu_userq_mqd_sdma_gfx11(c.Struct):
+  SIZE = 8
+  csa_va: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class struct_drm_amdgpu_userq_mqd_compute_gfx11(c.Struct):
+  SIZE = 8
+  eop_va: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class struct_drm_amdgpu_userq_signal(c.Struct):
+  SIZE = 48
+  queue_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  num_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  bo_read_handles: Annotated[Annotated[int, ctypes.c_uint64], 24]
+  bo_write_handles: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  num_bo_read_handles: Annotated[Annotated[int, ctypes.c_uint32], 40]
+  num_bo_write_handles: Annotated[Annotated[int, ctypes.c_uint32], 44]
+@c.record
+class struct_drm_amdgpu_userq_fence_info(c.Struct):
+  SIZE = 16
+  va: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  value: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_amdgpu_userq_wait(c.Struct):
+  SIZE = 72
+  waitq_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  syncobj_timeline_handles: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  syncobj_timeline_points: Annotated[Annotated[int, ctypes.c_uint64], 24]
+  bo_read_handles: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  bo_write_handles: Annotated[Annotated[int, ctypes.c_uint64], 40]
+  num_syncobj_timeline_handles: Annotated[Annotated[int, ctypes.c_uint16], 48]
+  num_fences: Annotated[Annotated[int, ctypes.c_uint16], 50]
+  num_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint32], 52]
+  num_bo_read_handles: Annotated[Annotated[int, ctypes.c_uint32], 56]
+  num_bo_write_handles: Annotated[Annotated[int, ctypes.c_uint32], 60]
+  out_fences: Annotated[Annotated[int, ctypes.c_uint64], 64]
+__u16: TypeAlias = Annotated[int, ctypes.c_uint16]
+class struct_drm_amdgpu_sem_in(ctypes.Structure): pass
+class union_drm_amdgpu_sem_out(ctypes.Union): pass
+class union_drm_amdgpu_sem(ctypes.Union): pass
+@c.record
+class struct_drm_amdgpu_vm_in(c.Struct):
+  SIZE = 8
+  op: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_vm_out(c.Struct):
+  SIZE = 8
+  flags: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class union_drm_amdgpu_vm(c.Struct):
+  SIZE = 8
+  _in: Annotated[struct_drm_amdgpu_vm_in, 0]
+  out: Annotated[struct_drm_amdgpu_vm_out, 0]
+@c.record
+class struct_drm_amdgpu_sched_in(c.Struct):
+  SIZE = 16
+  op: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  fd: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  priority: Annotated[Annotated[int, ctypes.c_int32], 8]
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class union_drm_amdgpu_sched(c.Struct):
+  SIZE = 16
+  _in: Annotated[struct_drm_amdgpu_sched_in, 0]
+@c.record
+class struct_drm_amdgpu_gem_userptr(c.Struct):
+  SIZE = 24
+  addr: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  size: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 20]
+@c.record
+class struct_drm_amdgpu_gem_dgma(c.Struct):
+  SIZE = 24
+  addr: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  size: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  op: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 20]
+@c.record
+class struct_drm_amdgpu_gem_metadata(c.Struct):
+  SIZE = 288
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  op: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  data: Annotated[struct_drm_amdgpu_gem_metadata_data, 8]
+@c.record
+class struct_drm_amdgpu_gem_metadata_data(c.Struct):
+  SIZE = 280
+  flags: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  tiling_info: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  data_size_bytes: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  data: Annotated[c.Array[Annotated[int, ctypes.c_uint32], Literal[64]], 20]
+@c.record
+class struct_drm_amdgpu_gem_mmap_in(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_gem_mmap_out(c.Struct):
+  SIZE = 8
+  addr_ptr: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class union_drm_amdgpu_gem_mmap(c.Struct):
+  SIZE = 8
+  _in: Annotated[struct_drm_amdgpu_gem_mmap_in, 0]
+  out: Annotated[struct_drm_amdgpu_gem_mmap_out, 0]
+@c.record
+class struct_drm_amdgpu_gem_wait_idle_in(c.Struct):
+  SIZE = 16
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  timeout: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_amdgpu_gem_wait_idle_out(c.Struct):
+  SIZE = 8
+  status: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  domain: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_gem_wait_idle(c.Struct):
+  SIZE = 16
+  _in: Annotated[struct_drm_amdgpu_gem_wait_idle_in, 0]
+  out: Annotated[struct_drm_amdgpu_gem_wait_idle_out, 0]
+@c.record
+class struct_drm_amdgpu_wait_cs_in(c.Struct):
+  SIZE = 32
+  handle: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  timeout: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  ip_type: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  ring: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class struct_drm_amdgpu_wait_cs_out(c.Struct):
+  SIZE = 8
+  status: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class union_drm_amdgpu_wait_cs(c.Struct):
+  SIZE = 32
+  _in: Annotated[struct_drm_amdgpu_wait_cs_in, 0]
+  out: Annotated[struct_drm_amdgpu_wait_cs_out, 0]
+@c.record
+class struct_drm_amdgpu_fence(c.Struct):
+  SIZE = 24
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  ip_type: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  ring: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  seq_no: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_amdgpu_wait_fences_in(c.Struct):
+  SIZE = 24
+  fences: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  fence_count: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  wait_all: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  timeout_ns: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_amdgpu_wait_fences_out(c.Struct):
+  SIZE = 8
+  status: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  first_signaled: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class union_drm_amdgpu_wait_fences(c.Struct):
+  SIZE = 24
+  _in: Annotated[struct_drm_amdgpu_wait_fences_in, 0]
+  out: Annotated[struct_drm_amdgpu_wait_fences_out, 0]
+@c.record
+class struct_drm_amdgpu_gem_op(c.Struct):
+  SIZE = 16
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  op: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  value: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_amdgpu_gem_va(c.Struct):
+  SIZE = 64
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  operation: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  va_address: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  offset_in_bo: Annotated[Annotated[int, ctypes.c_uint64], 24]
+  map_size: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  vm_timeline_point: Annotated[Annotated[int, ctypes.c_uint64], 40]
+  vm_timeline_syncobj_out: Annotated[Annotated[int, ctypes.c_uint32], 48]
+  num_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint32], 52]
+  input_fence_syncobj_handles: Annotated[Annotated[int, ctypes.c_uint64], 56]
+@c.record
+class struct_drm_amdgpu_cs_chunk(c.Struct):
+  SIZE = 16
+  chunk_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  length_dw: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  chunk_data: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class struct_drm_amdgpu_cs_in(c.Struct):
+  SIZE = 24
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  bo_list_handle: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  num_chunks: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  chunks: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_amdgpu_cs_out(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint64], 0]
+@c.record
+class union_drm_amdgpu_cs(c.Struct):
+  SIZE = 24
+  _in: Annotated[struct_drm_amdgpu_cs_in, 0]
+  out: Annotated[struct_drm_amdgpu_cs_out, 0]
+@c.record
+class struct_drm_amdgpu_cs_chunk_ib(c.Struct):
+  SIZE = 32
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  va_start: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  ib_bytes: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  ip_type: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  ring: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class struct_drm_amdgpu_cs_chunk_dep(c.Struct):
+  SIZE = 24
+  ip_type: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  ring: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  ctx_id: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  handle: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_amdgpu_cs_chunk_fence(c.Struct):
+  SIZE = 8
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  offset: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_cs_chunk_sem(c.Struct):
+  SIZE = 4
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+@c.record
+class struct_drm_amdgpu_cs_chunk_syncobj(c.Struct):
+  SIZE = 16
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  flags: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  point: Annotated[Annotated[int, ctypes.c_uint64], 8]
+@c.record
+class union_drm_amdgpu_fence_to_handle(c.Struct):
+  SIZE = 32
+  _in: Annotated[union_drm_amdgpu_fence_to_handle_in, 0]
+  out: Annotated[union_drm_amdgpu_fence_to_handle_out, 0]
+@c.record
+class union_drm_amdgpu_fence_to_handle_in(c.Struct):
+  SIZE = 32
+  fence: Annotated[struct_drm_amdgpu_fence, 0]
+  what: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class union_drm_amdgpu_fence_to_handle_out(c.Struct):
+  SIZE = 4
+  handle: Annotated[Annotated[int, ctypes.c_uint32], 0]
+@c.record
+class struct_drm_amdgpu_cs_chunk_data(c.Struct):
+  SIZE = 32
+  ib_data: Annotated[struct_drm_amdgpu_cs_chunk_ib, 0]
+  fence_data: Annotated[struct_drm_amdgpu_cs_chunk_fence, 0]
+@c.record
+class struct_drm_amdgpu_cs_chunk_cp_gfx_shadow(c.Struct):
+  SIZE = 32
+  shadow_va: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  csa_va: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  gds_va: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  flags: Annotated[Annotated[int, ctypes.c_uint64], 24]
+@c.record
+class struct_drm_amdgpu_query_fw(c.Struct):
+  SIZE = 16
+  fw_type: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  ip_instance: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  index: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_amdgpu_info(c.Struct):
+  SIZE = 16
+  return_pointer: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  return_size: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  query: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_amdgpu_info_gds(c.Struct):
+  SIZE = 32
+  gds_gfx_partition_size: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  compute_partition_size: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  gds_total_size: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  gws_per_gfx_partition: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  gws_per_compute_partition: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  oa_per_gfx_partition: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  oa_per_compute_partition: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  _pad: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class struct_drm_amdgpu_info_vram_gtt(c.Struct):
+  SIZE = 24
+  vram_size: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  vram_cpu_accessible_size: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  gtt_size: Annotated[Annotated[int, ctypes.c_uint64], 16]
+@c.record
+class struct_drm_amdgpu_heap_info(c.Struct):
+  SIZE = 32
+  total_heap_size: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  usable_heap_size: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  heap_usage: Annotated[Annotated[int, ctypes.c_uint64], 16]
+  max_allocation: Annotated[Annotated[int, ctypes.c_uint64], 24]
+@c.record
+class struct_drm_amdgpu_memory_info(c.Struct):
+  SIZE = 96
+  vram: Annotated[struct_drm_amdgpu_heap_info, 0]
+  cpu_accessible_vram: Annotated[struct_drm_amdgpu_heap_info, 32]
+  gtt: Annotated[struct_drm_amdgpu_heap_info, 64]
+@c.record
+class struct_drm_amdgpu_info_firmware(c.Struct):
+  SIZE = 8
+  ver: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  feature: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_info_vbios(c.Struct):
+  SIZE = 200
+  name: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[64]], 0]
+  vbios_pn: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[64]], 64]
+  version: Annotated[Annotated[int, ctypes.c_uint32], 128]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 132]
+  vbios_ver_str: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[32]], 136]
+  date: Annotated[c.Array[Annotated[int, ctypes.c_ubyte], Literal[32]], 168]
+__u8: TypeAlias = Annotated[int, ctypes.c_ubyte]
+@c.record
+class struct_drm_amdgpu_info_device(c.Struct):
+  SIZE = 448
+  device_id: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  chip_rev: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  external_rev: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  pci_rev: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  family: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  num_shader_engines: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  num_shader_arrays_per_engine: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  gpu_counter_freq: Annotated[Annotated[int, ctypes.c_uint32], 28]
+  max_engine_clock: Annotated[Annotated[int, ctypes.c_uint64], 32]
+  max_memory_clock: Annotated[Annotated[int, ctypes.c_uint64], 40]
+  cu_active_number: Annotated[Annotated[int, ctypes.c_uint32], 48]
+  cu_ao_mask: Annotated[Annotated[int, ctypes.c_uint32], 52]
+  cu_bitmap: Annotated[c.Array[c.Array[Annotated[int, ctypes.c_uint32], Literal[4]], Literal[4]], 56]
+  enabled_rb_pipes_mask: Annotated[Annotated[int, ctypes.c_uint32], 120]
+  num_rb_pipes: Annotated[Annotated[int, ctypes.c_uint32], 124]
+  num_hw_gfx_contexts: Annotated[Annotated[int, ctypes.c_uint32], 128]
+  pcie_gen: Annotated[Annotated[int, ctypes.c_uint32], 132]
+  ids_flags: Annotated[Annotated[int, ctypes.c_uint64], 136]
+  virtual_address_offset: Annotated[Annotated[int, ctypes.c_uint64], 144]
+  virtual_address_max: Annotated[Annotated[int, ctypes.c_uint64], 152]
+  virtual_address_alignment: Annotated[Annotated[int, ctypes.c_uint32], 160]
+  pte_fragment_size: Annotated[Annotated[int, ctypes.c_uint32], 164]
+  gart_page_size: Annotated[Annotated[int, ctypes.c_uint32], 168]
+  ce_ram_size: Annotated[Annotated[int, ctypes.c_uint32], 172]
+  vram_type: Annotated[Annotated[int, ctypes.c_uint32], 176]
+  vram_bit_width: Annotated[Annotated[int, ctypes.c_uint32], 180]
+  vce_harvest_config: Annotated[Annotated[int, ctypes.c_uint32], 184]
+  gc_double_offchip_lds_buf: Annotated[Annotated[int, ctypes.c_uint32], 188]
+  prim_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 192]
+  pos_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 200]
+  cntl_sb_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 208]
+  param_buf_gpu_addr: Annotated[Annotated[int, ctypes.c_uint64], 216]
+  prim_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 224]
+  pos_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 228]
+  cntl_sb_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 232]
+  param_buf_size: Annotated[Annotated[int, ctypes.c_uint32], 236]
+  wave_front_size: Annotated[Annotated[int, ctypes.c_uint32], 240]
+  num_shader_visible_vgprs: Annotated[Annotated[int, ctypes.c_uint32], 244]
+  num_cu_per_sh: Annotated[Annotated[int, ctypes.c_uint32], 248]
+  num_tcc_blocks: Annotated[Annotated[int, ctypes.c_uint32], 252]
+  gs_vgt_table_depth: Annotated[Annotated[int, ctypes.c_uint32], 256]
+  gs_prim_buffer_depth: Annotated[Annotated[int, ctypes.c_uint32], 260]
+  max_gs_waves_per_vgt: Annotated[Annotated[int, ctypes.c_uint32], 264]
+  pcie_num_lanes: Annotated[Annotated[int, ctypes.c_uint32], 268]
+  cu_ao_bitmap: Annotated[c.Array[c.Array[Annotated[int, ctypes.c_uint32], Literal[4]], Literal[4]], 272]
+  high_va_offset: Annotated[Annotated[int, ctypes.c_uint64], 336]
+  high_va_max: Annotated[Annotated[int, ctypes.c_uint64], 344]
+  pa_sc_tile_steering_override: Annotated[Annotated[int, ctypes.c_uint32], 352]
+  tcc_disabled_mask: Annotated[Annotated[int, ctypes.c_uint64], 360]
+  min_engine_clock: Annotated[Annotated[int, ctypes.c_uint64], 368]
+  min_memory_clock: Annotated[Annotated[int, ctypes.c_uint64], 376]
+  tcp_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 384]
+  num_sqc_per_wgp: Annotated[Annotated[int, ctypes.c_uint32], 388]
+  sqc_data_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 392]
+  sqc_inst_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 396]
+  gl1c_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 400]
+  gl2c_cache_size: Annotated[Annotated[int, ctypes.c_uint32], 404]
+  mall_size: Annotated[Annotated[int, ctypes.c_uint64], 408]
+  enabled_rb_pipes_mask_hi: Annotated[Annotated[int, ctypes.c_uint32], 416]
+  shadow_size: Annotated[Annotated[int, ctypes.c_uint32], 420]
+  shadow_alignment: Annotated[Annotated[int, ctypes.c_uint32], 424]
+  csa_size: Annotated[Annotated[int, ctypes.c_uint32], 428]
+  csa_alignment: Annotated[Annotated[int, ctypes.c_uint32], 432]
+  userq_ip_mask: Annotated[Annotated[int, ctypes.c_uint32], 436]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 440]
+@c.record
+class struct_drm_amdgpu_info_hw_ip(c.Struct):
+  SIZE = 32
+  hw_ip_version_major: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  hw_ip_version_minor: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  capabilities_flags: Annotated[Annotated[int, ctypes.c_uint64], 8]
+  ib_start_alignment: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  ib_size_alignment: Annotated[Annotated[int, ctypes.c_uint32], 20]
+  available_rings: Annotated[Annotated[int, ctypes.c_uint32], 24]
+  ip_discovery_version: Annotated[Annotated[int, ctypes.c_uint32], 28]
+@c.record
+class struct_drm_amdgpu_info_uq_fw_areas_gfx(c.Struct):
+  SIZE = 16
+  shadow_size: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  shadow_alignment: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  csa_size: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  csa_alignment: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_amdgpu_info_uq_fw_areas(c.Struct):
+  SIZE = 16
+  gfx: Annotated[struct_drm_amdgpu_info_uq_fw_areas_gfx, 0]
+@c.record
+class struct_drm_amdgpu_info_num_handles(c.Struct):
+  SIZE = 8
+  uvd_max_handles: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  uvd_used_handles: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_info_vce_clock_table_entry(c.Struct):
+  SIZE = 16
+  sclk: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  mclk: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  eclk: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_amdgpu_info_vce_clock_table(c.Struct):
+  SIZE = 104
+  entries: Annotated[c.Array[struct_drm_amdgpu_info_vce_clock_table_entry, Literal[6]], 0]
+  num_valid_entries: Annotated[Annotated[int, ctypes.c_uint32], 96]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 100]
+@c.record
+class struct_drm_amdgpu_info_video_codec_info(c.Struct):
+  SIZE = 24
+  valid: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  max_width: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  max_height: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  max_pixels_per_frame: Annotated[Annotated[int, ctypes.c_uint32], 12]
+  max_level: Annotated[Annotated[int, ctypes.c_uint32], 16]
+  pad: Annotated[Annotated[int, ctypes.c_uint32], 20]
+@c.record
+class struct_drm_amdgpu_info_video_caps(c.Struct):
+  SIZE = 192
+  codec_info: Annotated[c.Array[struct_drm_amdgpu_info_video_codec_info, Literal[8]], 0]
+@c.record
+class struct_drm_amdgpu_info_gpuvm_fault(c.Struct):
+  SIZE = 16
+  addr: Annotated[Annotated[int, ctypes.c_uint64], 0]
+  status: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  vmhub: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_amdgpu_info_uq_metadata_gfx(c.Struct):
+  SIZE = 16
+  shadow_size: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  shadow_alignment: Annotated[Annotated[int, ctypes.c_uint32], 4]
+  csa_size: Annotated[Annotated[int, ctypes.c_uint32], 8]
+  csa_alignment: Annotated[Annotated[int, ctypes.c_uint32], 12]
+@c.record
+class struct_drm_amdgpu_info_uq_metadata(c.Struct):
+  SIZE = 16
+  gfx: Annotated[struct_drm_amdgpu_info_uq_metadata_gfx, 0]
+class _anonstruct0(ctypes.Structure): pass
+class struct_drm_amdgpu_virtual_range(ctypes.Structure): pass
+@c.record
+class struct_drm_amdgpu_capability(c.Struct):
+  SIZE = 8
+  flag: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  direct_gma_size: Annotated[Annotated[int, ctypes.c_uint32], 4]
+@c.record
+class struct_drm_amdgpu_freesync(c.Struct):
+  SIZE = 32
+  op: Annotated[Annotated[int, ctypes.c_uint32], 0]
+  spare: Annotated[c.Array[Annotated[int, ctypes.c_uint32], Literal[7]], 4]
+c.init_records()
+DRM_NAME = "drm" # type: ignore
+DRM_MIN_ORDER = 5 # type: ignore
+DRM_MAX_ORDER = 22 # type: ignore
+DRM_RAM_PERCENT = 10 # type: ignore
+_DRM_LOCK_HELD = 0x80000000 # type: ignore
+_DRM_LOCK_CONT = 0x40000000 # type: ignore
+_DRM_LOCK_IS_HELD = lambda lock: ((lock) & _DRM_LOCK_HELD) # type: ignore
+_DRM_LOCK_IS_CONT = lambda lock: ((lock) & _DRM_LOCK_CONT) # type: ignore
+_DRM_LOCKING_CONTEXT = lambda lock: ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) # type: ignore
+_DRM_VBLANK_HIGH_CRTC_SHIFT = 1 # type: ignore
+_DRM_VBLANK_TYPES_MASK = (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) # type: ignore
+_DRM_VBLANK_FLAGS_MASK = (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) # type: ignore
+_DRM_PRE_MODESET = 1 # type: ignore
+_DRM_POST_MODESET = 2 # type: ignore
+DRM_CAP_DUMB_BUFFER = 0x1 # type: ignore
+DRM_CAP_VBLANK_HIGH_CRTC = 0x2 # type: ignore
+DRM_CAP_DUMB_PREFERRED_DEPTH = 0x3 # type: ignore
+DRM_CAP_DUMB_PREFER_SHADOW = 0x4 # type: ignore
+DRM_CAP_PRIME = 0x5 # type: ignore
+DRM_PRIME_CAP_IMPORT = 0x1 # type: ignore
+DRM_PRIME_CAP_EXPORT = 0x2 # type: ignore
+DRM_CAP_TIMESTAMP_MONOTONIC = 0x6 # type: ignore
+DRM_CAP_ASYNC_PAGE_FLIP = 0x7 # type: ignore
+DRM_CAP_CURSOR_WIDTH = 0x8 # type: ignore
+DRM_CAP_CURSOR_HEIGHT = 0x9 # type: ignore
+DRM_CAP_ADDFB2_MODIFIERS = 0x10 # type: ignore
+DRM_CAP_PAGE_FLIP_TARGET = 0x11 # type: ignore
+DRM_CAP_CRTC_IN_VBLANK_EVENT = 0x12 # type: ignore
+DRM_CAP_SYNCOBJ = 0x13 # type: ignore
+DRM_CAP_SYNCOBJ_TIMELINE = 0x14 # type: ignore
+DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP = 0x15 # type: ignore
+DRM_CLIENT_CAP_STEREO_3D = 1 # type: ignore
+DRM_CLIENT_CAP_UNIVERSAL_PLANES = 2 # type: ignore
+DRM_CLIENT_CAP_ATOMIC = 3 # type: ignore
+DRM_CLIENT_CAP_ASPECT_RATIO = 4 # type: ignore
+DRM_CLIENT_CAP_WRITEBACK_CONNECTORS = 5 # type: ignore
+DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT = 6 # type: ignore
+DRM_SYNCOBJ_CREATE_SIGNALED = (1 << 0) # type: ignore
+DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE = (1 << 0) # type: ignore
+DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE = (1 << 0) # type: ignore
+DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL = (1 << 0) # type: ignore
+DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT = (1 << 1) # type: ignore
+DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE = (1 << 2) # type: ignore
+DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE = (1 << 3) # type: ignore
+DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED = (1 << 0) # type: ignore
+DRM_CRTC_SEQUENCE_RELATIVE = 0x00000001 # type: ignore
+DRM_CRTC_SEQUENCE_NEXT_ON_MISS = 0x00000002 # type: ignore
+DRM_IOCTL_BASE = 'd' # type: ignore
+DRM_IO = lambda nr: _IO(DRM_IOCTL_BASE,nr) # type: ignore
+DRM_IOR = lambda nr,type: _IOR(DRM_IOCTL_BASE,nr,type) # type: ignore
+DRM_IOW = lambda nr,type: _IOW(DRM_IOCTL_BASE,nr,type) # type: ignore
+DRM_IOWR = lambda nr,type: _IOWR(DRM_IOCTL_BASE,nr,type) # type: ignore
+DRM_IOCTL_VERSION = DRM_IOWR(0x00, struct_drm_version) # type: ignore
+DRM_IOCTL_GET_UNIQUE = DRM_IOWR(0x01, struct_drm_unique) # type: ignore
+DRM_IOCTL_GET_MAGIC = DRM_IOR( 0x02, struct_drm_auth) # type: ignore
+DRM_IOCTL_IRQ_BUSID = DRM_IOWR(0x03, struct_drm_irq_busid) # type: ignore
+DRM_IOCTL_GET_MAP = DRM_IOWR(0x04, struct_drm_map) # type: ignore
+DRM_IOCTL_GET_CLIENT = DRM_IOWR(0x05, struct_drm_client) # type: ignore
+DRM_IOCTL_GET_STATS = DRM_IOR( 0x06, struct_drm_stats) # type: ignore
+DRM_IOCTL_SET_VERSION = DRM_IOWR(0x07, struct_drm_set_version) # type: ignore
+DRM_IOCTL_MODESET_CTL = DRM_IOW(0x08, struct_drm_modeset_ctl) # type: ignore
+DRM_IOCTL_GEM_CLOSE = DRM_IOW (0x09, struct_drm_gem_close) # type: ignore
+DRM_IOCTL_GEM_FLINK = DRM_IOWR(0x0a, struct_drm_gem_flink) # type: ignore
+DRM_IOCTL_GEM_OPEN = DRM_IOWR(0x0b, struct_drm_gem_open) # type: ignore
+DRM_IOCTL_GET_CAP = DRM_IOWR(0x0c, struct_drm_get_cap) # type: ignore
+DRM_IOCTL_SET_CLIENT_CAP = DRM_IOW( 0x0d, struct_drm_set_client_cap) # type: ignore
+DRM_IOCTL_SET_UNIQUE = DRM_IOW( 0x10, struct_drm_unique) # type: ignore
+DRM_IOCTL_AUTH_MAGIC = DRM_IOW( 0x11, struct_drm_auth) # type: ignore
+DRM_IOCTL_BLOCK = DRM_IOWR(0x12, struct_drm_block) # type: ignore
+DRM_IOCTL_UNBLOCK = DRM_IOWR(0x13, struct_drm_block) # type: ignore
+DRM_IOCTL_CONTROL = DRM_IOW( 0x14, struct_drm_control) # type: ignore
+DRM_IOCTL_ADD_MAP = DRM_IOWR(0x15, struct_drm_map) # type: ignore
+DRM_IOCTL_ADD_BUFS = DRM_IOWR(0x16, struct_drm_buf_desc) # type: ignore
+DRM_IOCTL_MARK_BUFS = DRM_IOW( 0x17, struct_drm_buf_desc) # type: ignore
+DRM_IOCTL_INFO_BUFS = DRM_IOWR(0x18, struct_drm_buf_info) # type: ignore
+DRM_IOCTL_MAP_BUFS = DRM_IOWR(0x19, struct_drm_buf_map) # type: ignore
+DRM_IOCTL_FREE_BUFS = DRM_IOW( 0x1a, struct_drm_buf_free) # type: ignore
+DRM_IOCTL_RM_MAP = DRM_IOW( 0x1b, struct_drm_map) # type: ignore
+DRM_IOCTL_SET_SAREA_CTX = DRM_IOW( 0x1c, struct_drm_ctx_priv_map) # type: ignore
+DRM_IOCTL_GET_SAREA_CTX = DRM_IOWR(0x1d, struct_drm_ctx_priv_map) # type: ignore
+DRM_IOCTL_SET_MASTER = DRM_IO(0x1e) # type: ignore
+DRM_IOCTL_DROP_MASTER = DRM_IO(0x1f) # type: ignore
+DRM_IOCTL_ADD_CTX = DRM_IOWR(0x20, struct_drm_ctx) # type: ignore
+DRM_IOCTL_RM_CTX = DRM_IOWR(0x21, struct_drm_ctx) # type: ignore
+DRM_IOCTL_MOD_CTX = DRM_IOW( 0x22, struct_drm_ctx) # type: ignore
+DRM_IOCTL_GET_CTX = DRM_IOWR(0x23, struct_drm_ctx) # type: ignore
+DRM_IOCTL_SWITCH_CTX = DRM_IOW( 0x24, struct_drm_ctx) # type: ignore
+DRM_IOCTL_NEW_CTX = DRM_IOW( 0x25, struct_drm_ctx) # type: ignore
+DRM_IOCTL_RES_CTX = DRM_IOWR(0x26, struct_drm_ctx_res) # type: ignore
+DRM_IOCTL_ADD_DRAW = DRM_IOWR(0x27, struct_drm_draw) # type: ignore
+DRM_IOCTL_RM_DRAW = DRM_IOWR(0x28, struct_drm_draw) # type: ignore
+DRM_IOCTL_DMA = DRM_IOWR(0x29, struct_drm_dma) # type: ignore
+DRM_IOCTL_LOCK = DRM_IOW( 0x2a, struct_drm_lock) # type: ignore
+DRM_IOCTL_UNLOCK = DRM_IOW( 0x2b, struct_drm_lock) # type: ignore
+DRM_IOCTL_FINISH = DRM_IOW( 0x2c, struct_drm_lock) # type: ignore
+DRM_IOCTL_PRIME_HANDLE_TO_FD = DRM_IOWR(0x2d, struct_drm_prime_handle) # type: ignore
+DRM_IOCTL_PRIME_FD_TO_HANDLE = DRM_IOWR(0x2e, struct_drm_prime_handle) # type: ignore
+DRM_IOCTL_AGP_ACQUIRE = DRM_IO(  0x30) # type: ignore
+DRM_IOCTL_AGP_RELEASE = DRM_IO(  0x31) # type: ignore
+DRM_IOCTL_AGP_ENABLE = DRM_IOW( 0x32, struct_drm_agp_mode) # type: ignore
+DRM_IOCTL_AGP_INFO = DRM_IOR( 0x33, struct_drm_agp_info) # type: ignore
+DRM_IOCTL_AGP_ALLOC = DRM_IOWR(0x34, struct_drm_agp_buffer) # type: ignore
+DRM_IOCTL_AGP_FREE = DRM_IOW( 0x35, struct_drm_agp_buffer) # type: ignore
+DRM_IOCTL_AGP_BIND = DRM_IOW( 0x36, struct_drm_agp_binding) # type: ignore
+DRM_IOCTL_AGP_UNBIND = DRM_IOW( 0x37, struct_drm_agp_binding) # type: ignore
+DRM_IOCTL_SG_ALLOC = DRM_IOWR(0x38, struct_drm_scatter_gather) # type: ignore
+DRM_IOCTL_SG_FREE = DRM_IOW( 0x39, struct_drm_scatter_gather) # type: ignore
+DRM_IOCTL_WAIT_VBLANK = DRM_IOWR(0x3a, union_drm_wait_vblank) # type: ignore
+DRM_IOCTL_CRTC_GET_SEQUENCE = DRM_IOWR(0x3b, struct_drm_crtc_get_sequence) # type: ignore
+DRM_IOCTL_CRTC_QUEUE_SEQUENCE = DRM_IOWR(0x3c, struct_drm_crtc_queue_sequence) # type: ignore
+DRM_IOCTL_UPDATE_DRAW = DRM_IOW(0x3f, struct_drm_update_draw) # type: ignore
+DRM_IOCTL_SYNCOBJ_CREATE = DRM_IOWR(0xBF, struct_drm_syncobj_create) # type: ignore
+DRM_IOCTL_SYNCOBJ_DESTROY = DRM_IOWR(0xC0, struct_drm_syncobj_destroy) # type: ignore
+DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD = DRM_IOWR(0xC1, struct_drm_syncobj_handle) # type: ignore
+DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE = DRM_IOWR(0xC2, struct_drm_syncobj_handle) # type: ignore
+DRM_IOCTL_SYNCOBJ_WAIT = DRM_IOWR(0xC3, struct_drm_syncobj_wait) # type: ignore
+DRM_IOCTL_SYNCOBJ_RESET = DRM_IOWR(0xC4, struct_drm_syncobj_array) # type: ignore
+DRM_IOCTL_SYNCOBJ_SIGNAL = DRM_IOWR(0xC5, struct_drm_syncobj_array) # type: ignore
+DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT = DRM_IOWR(0xCA, struct_drm_syncobj_timeline_wait) # type: ignore
+DRM_IOCTL_SYNCOBJ_QUERY = DRM_IOWR(0xCB, struct_drm_syncobj_timeline_array) # type: ignore
+DRM_IOCTL_SYNCOBJ_TRANSFER = DRM_IOWR(0xCC, struct_drm_syncobj_transfer) # type: ignore
+DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL = DRM_IOWR(0xCD, struct_drm_syncobj_timeline_array) # type: ignore
+DRM_IOCTL_SYNCOBJ_EVENTFD = DRM_IOWR(0xCF, struct_drm_syncobj_eventfd) # type: ignore
+DRM_COMMAND_BASE = 0x40 # type: ignore
+DRM_COMMAND_END = 0xA0 # type: ignore
+DRM_EVENT_VBLANK = 0x01 # type: ignore
+DRM_EVENT_FLIP_COMPLETE = 0x02 # type: ignore
+DRM_EVENT_CRTC_SEQUENCE = 0x03 # type: ignore
+DRM_AMDGPU_GEM_CREATE = 0x00 # type: ignore
+DRM_AMDGPU_GEM_MMAP = 0x01 # type: ignore
+DRM_AMDGPU_CTX = 0x02 # type: ignore
+DRM_AMDGPU_BO_LIST = 0x03 # type: ignore
+DRM_AMDGPU_CS = 0x04 # type: ignore
+DRM_AMDGPU_INFO = 0x05 # type: ignore
+DRM_AMDGPU_GEM_METADATA = 0x06 # type: ignore
+DRM_AMDGPU_GEM_WAIT_IDLE = 0x07 # type: ignore
+DRM_AMDGPU_GEM_VA = 0x08 # type: ignore
+DRM_AMDGPU_WAIT_CS = 0x09 # type: ignore
+DRM_AMDGPU_GEM_OP = 0x10 # type: ignore
+DRM_AMDGPU_GEM_USERPTR = 0x11 # type: ignore
+DRM_AMDGPU_WAIT_FENCES = 0x12 # type: ignore
+DRM_AMDGPU_VM = 0x13 # type: ignore
+DRM_AMDGPU_FENCE_TO_HANDLE = 0x14 # type: ignore
+DRM_AMDGPU_SCHED = 0x15 # type: ignore
+DRM_AMDGPU_USERQ = 0x16 # type: ignore
+DRM_AMDGPU_USERQ_SIGNAL = 0x17 # type: ignore
+DRM_AMDGPU_USERQ_WAIT = 0x18 # type: ignore
+DRM_AMDGPU_GEM_DGMA = 0x5c # type: ignore
+DRM_AMDGPU_SEM = 0x5b # type: ignore
+DRM_IOCTL_AMDGPU_GEM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union_drm_amdgpu_gem_create) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_MMAP = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union_drm_amdgpu_gem_mmap) # type: ignore
+DRM_IOCTL_AMDGPU_CTX = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union_drm_amdgpu_ctx) # type: ignore
+DRM_IOCTL_AMDGPU_BO_LIST = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union_drm_amdgpu_bo_list) # type: ignore
+DRM_IOCTL_AMDGPU_CS = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union_drm_amdgpu_cs) # type: ignore
+DRM_IOCTL_AMDGPU_INFO = DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct_drm_amdgpu_info) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_METADATA = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct_drm_amdgpu_gem_metadata) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union_drm_amdgpu_gem_wait_idle) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_VA = DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct_drm_amdgpu_gem_va) # type: ignore
+DRM_IOCTL_AMDGPU_WAIT_CS = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union_drm_amdgpu_wait_cs) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_OP = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct_drm_amdgpu_gem_op) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_USERPTR = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct_drm_amdgpu_gem_userptr) # type: ignore
+DRM_IOCTL_AMDGPU_WAIT_FENCES = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union_drm_amdgpu_wait_fences) # type: ignore
+DRM_IOCTL_AMDGPU_VM = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union_drm_amdgpu_vm) # type: ignore
+DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union_drm_amdgpu_fence_to_handle) # type: ignore
+DRM_IOCTL_AMDGPU_SCHED = DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union_drm_amdgpu_sched) # type: ignore
+DRM_IOCTL_AMDGPU_USERQ = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union_drm_amdgpu_userq) # type: ignore
+DRM_IOCTL_AMDGPU_USERQ_SIGNAL = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct_drm_amdgpu_userq_signal) # type: ignore
+DRM_IOCTL_AMDGPU_USERQ_WAIT = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct_drm_amdgpu_userq_wait) # type: ignore
+DRM_IOCTL_AMDGPU_GEM_DGMA = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct_drm_amdgpu_gem_dgma) # type: ignore
+DRM_IOCTL_AMDGPU_SEM = DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union_drm_amdgpu_sem) # type: ignore
+AMDGPU_GEM_DOMAIN_CPU = 0x1 # type: ignore
+AMDGPU_GEM_DOMAIN_GTT = 0x2 # type: ignore
+AMDGPU_GEM_DOMAIN_VRAM = 0x4 # type: ignore
+AMDGPU_GEM_DOMAIN_GDS = 0x8 # type: ignore
+AMDGPU_GEM_DOMAIN_GWS = 0x10 # type: ignore
+AMDGPU_GEM_DOMAIN_OA = 0x20 # type: ignore
+AMDGPU_GEM_DOMAIN_DOORBELL = 0x40 # type: ignore
+AMDGPU_GEM_DOMAIN_DGMA = 0x400 # type: ignore
+AMDGPU_GEM_DOMAIN_DGMA_IMPORT = 0x800 # type: ignore
+AMDGPU_GEM_DOMAIN_MASK = (AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | AMDGPU_GEM_DOMAIN_DOORBELL | AMDGPU_GEM_DOMAIN_DGMA | AMDGPU_GEM_DOMAIN_DGMA_IMPORT) # type: ignore
+AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED = (1 << 0) # type: ignore
+AMDGPU_GEM_CREATE_NO_CPU_ACCESS = (1 << 1) # type: ignore
+AMDGPU_GEM_CREATE_CPU_GTT_USWC = (1 << 2) # type: ignore
+AMDGPU_GEM_CREATE_VRAM_CLEARED = (1 << 3) # type: ignore
+AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS = (1 << 5) # type: ignore
+AMDGPU_GEM_CREATE_VM_ALWAYS_VALID = (1 << 6) # type: ignore
+AMDGPU_GEM_CREATE_EXPLICIT_SYNC = (1 << 7) # type: ignore
+AMDGPU_GEM_CREATE_CP_MQD_GFX9 = (1 << 8) # type: ignore
+AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE = (1 << 9) # type: ignore
+AMDGPU_GEM_CREATE_ENCRYPTED = (1 << 10) # type: ignore
+AMDGPU_GEM_CREATE_PREEMPTIBLE = (1 << 11) # type: ignore
+AMDGPU_GEM_CREATE_DISCARDABLE = (1 << 12) # type: ignore
+AMDGPU_GEM_CREATE_COHERENT = (1 << 13) # type: ignore
+AMDGPU_GEM_CREATE_UNCACHED = (1 << 14) # type: ignore
+AMDGPU_GEM_CREATE_EXT_COHERENT = (1 << 15) # type: ignore
+AMDGPU_GEM_CREATE_GFX12_DCC = (1 << 16) # type: ignore
+AMDGPU_GEM_CREATE_SPARSE = (1 << 29) # type: ignore
+AMDGPU_GEM_CREATE_TOP_DOWN = (1 << 30) # type: ignore
+AMDGPU_GEM_CREATE_NO_EVICT = (1 << 31) # type: ignore
+AMDGPU_BO_LIST_OP_CREATE = 0 # type: ignore
+AMDGPU_BO_LIST_OP_DESTROY = 1 # type: ignore
+AMDGPU_BO_LIST_OP_UPDATE = 2 # type: ignore
+AMDGPU_CTX_OP_ALLOC_CTX = 1 # type: ignore
+AMDGPU_CTX_OP_FREE_CTX = 2 # type: ignore
+AMDGPU_CTX_OP_QUERY_STATE = 3 # type: ignore
+AMDGPU_CTX_OP_QUERY_STATE2 = 4 # type: ignore
+AMDGPU_CTX_OP_GET_STABLE_PSTATE = 5 # type: ignore
+AMDGPU_CTX_OP_SET_STABLE_PSTATE = 6 # type: ignore
+AMDGPU_CTX_NO_RESET = 0 # type: ignore
+AMDGPU_CTX_GUILTY_RESET = 1 # type: ignore
+AMDGPU_CTX_INNOCENT_RESET = 2 # type: ignore
+AMDGPU_CTX_UNKNOWN_RESET = 3 # type: ignore
+AMDGPU_CTX_QUERY2_FLAGS_RESET = (1<<0) # type: ignore
+AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST = (1<<1) # type: ignore
+AMDGPU_CTX_QUERY2_FLAGS_GUILTY = (1<<2) # type: ignore
+AMDGPU_CTX_QUERY2_FLAGS_RAS_CE = (1<<3) # type: ignore
+AMDGPU_CTX_QUERY2_FLAGS_RAS_UE = (1<<4) # type: ignore
+AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS = (1<<5) # type: ignore
+AMDGPU_CTX_PRIORITY_UNSET = -2048 # type: ignore
+AMDGPU_CTX_PRIORITY_VERY_LOW = -1023 # type: ignore
+AMDGPU_CTX_PRIORITY_LOW = -512 # type: ignore
+AMDGPU_CTX_PRIORITY_NORMAL = 0 # type: ignore
+AMDGPU_CTX_PRIORITY_HIGH = 512 # type: ignore
+AMDGPU_CTX_PRIORITY_VERY_HIGH = 1023 # type: ignore
+AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK = 0xf # type: ignore
+AMDGPU_CTX_STABLE_PSTATE_NONE = 0 # type: ignore
+AMDGPU_CTX_STABLE_PSTATE_STANDARD = 1 # type: ignore
+AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK = 2 # type: ignore
+AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK = 3 # type: ignore
+AMDGPU_CTX_STABLE_PSTATE_PEAK = 4 # type: ignore
+AMDGPU_USERQ_OP_CREATE = 1 # type: ignore
+AMDGPU_USERQ_OP_FREE = 2 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK = 0x3 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT = 0 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW = 0 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW = 1 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH = 2 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH = 3 # type: ignore
+AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE = (1 << 2) # type: ignore
+AMDGPU_SEM_OP_CREATE_SEM = 1 # type: ignore
+AMDGPU_SEM_OP_WAIT_SEM = 2 # type: ignore
+AMDGPU_SEM_OP_SIGNAL_SEM = 3 # type: ignore
+AMDGPU_SEM_OP_DESTROY_SEM = 4 # type: ignore
+AMDGPU_SEM_OP_IMPORT_SEM = 5 # type: ignore
+AMDGPU_SEM_OP_EXPORT_SEM = 6 # type: ignore
+AMDGPU_VM_OP_RESERVE_VMID = 1 # type: ignore
+AMDGPU_VM_OP_UNRESERVE_VMID = 2 # type: ignore
+AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE = 1 # type: ignore
+AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE = 2 # type: ignore
+AMDGPU_GEM_USERPTR_READONLY = (1 << 0) # type: ignore
+AMDGPU_GEM_USERPTR_ANONONLY = (1 << 1) # type: ignore
+AMDGPU_GEM_USERPTR_VALIDATE = (1 << 2) # type: ignore
+AMDGPU_GEM_USERPTR_REGISTER = (1 << 3) # type: ignore
+AMDGPU_GEM_DGMA_IMPORT = 0 # type: ignore
+AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR = 1 # type: ignore
+AMDGPU_TILING_ARRAY_MODE_SHIFT = 0 # type: ignore
+AMDGPU_TILING_ARRAY_MODE_MASK = 0xf # type: ignore
+AMDGPU_TILING_PIPE_CONFIG_SHIFT = 4 # type: ignore
+AMDGPU_TILING_PIPE_CONFIG_MASK = 0x1f # type: ignore
+AMDGPU_TILING_TILE_SPLIT_SHIFT = 9 # type: ignore
+AMDGPU_TILING_TILE_SPLIT_MASK = 0x7 # type: ignore
+AMDGPU_TILING_MICRO_TILE_MODE_SHIFT = 12 # type: ignore
+AMDGPU_TILING_MICRO_TILE_MODE_MASK = 0x7 # type: ignore
+AMDGPU_TILING_BANK_WIDTH_SHIFT = 15 # type: ignore
+AMDGPU_TILING_BANK_WIDTH_MASK = 0x3 # type: ignore
+AMDGPU_TILING_BANK_HEIGHT_SHIFT = 17 # type: ignore
+AMDGPU_TILING_BANK_HEIGHT_MASK = 0x3 # type: ignore
+AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT = 19 # type: ignore
+AMDGPU_TILING_MACRO_TILE_ASPECT_MASK = 0x3 # type: ignore
+AMDGPU_TILING_NUM_BANKS_SHIFT = 21 # type: ignore
+AMDGPU_TILING_NUM_BANKS_MASK = 0x3 # type: ignore
+AMDGPU_TILING_SWIZZLE_MODE_SHIFT = 0 # type: ignore
+AMDGPU_TILING_SWIZZLE_MODE_MASK = 0x1f # type: ignore
+AMDGPU_TILING_DCC_OFFSET_256B_SHIFT = 5 # type: ignore
+AMDGPU_TILING_DCC_OFFSET_256B_MASK = 0xFFFFFF # type: ignore
+AMDGPU_TILING_DCC_PITCH_MAX_SHIFT = 29 # type: ignore
+AMDGPU_TILING_DCC_PITCH_MAX_MASK = 0x3FFF # type: ignore
+AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT = 43 # type: ignore
+AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK = 0x1 # type: ignore
+AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT = 44 # type: ignore
+AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK = 0x1 # type: ignore
+AMDGPU_TILING_SCANOUT_SHIFT = 63 # type: ignore
+AMDGPU_TILING_SCANOUT_MASK = 0x1 # type: ignore
+AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT = 0 # type: ignore
+AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK = 0x7 # type: ignore
+AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT = 3 # type: ignore
+AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK = 0x3 # type: ignore
+AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT = 5 # type: ignore
+AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK = 0x7 # type: ignore
+AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT = 8 # type: ignore
+AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK = 0x3f # type: ignore
+AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT = 14 # type: ignore
+AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK = 0x1 # type: ignore
+AMDGPU_TILING_GFX12_SCANOUT_SHIFT = 63 # type: ignore
+AMDGPU_TILING_GFX12_SCANOUT_MASK = 0x1 # type: ignore
+AMDGPU_GEM_METADATA_OP_SET_METADATA = 1 # type: ignore
+AMDGPU_GEM_METADATA_OP_GET_METADATA = 2 # type: ignore
+AMDGPU_GEM_OP_GET_GEM_CREATE_INFO = 0 # type: ignore
+AMDGPU_GEM_OP_SET_PLACEMENT = 1 # type: ignore
+AMDGPU_VA_OP_MAP = 1 # type: ignore
+AMDGPU_VA_OP_UNMAP = 2 # type: ignore
+AMDGPU_VA_OP_CLEAR = 3 # type: ignore
+AMDGPU_VA_OP_REPLACE = 4 # type: ignore
+AMDGPU_VM_DELAY_UPDATE = (1 << 0) # type: ignore
+AMDGPU_VM_PAGE_READABLE = (1 << 1) # type: ignore
+AMDGPU_VM_PAGE_WRITEABLE = (1 << 2) # type: ignore
+AMDGPU_VM_PAGE_EXECUTABLE = (1 << 3) # type: ignore
+AMDGPU_VM_PAGE_PRT = (1 << 4) # type: ignore
+AMDGPU_VM_MTYPE_MASK = (0xf << 5) # type: ignore
+AMDGPU_VM_MTYPE_DEFAULT = (0 << 5) # type: ignore
+AMDGPU_VM_MTYPE_NC = (1 << 5) # type: ignore
+AMDGPU_VM_MTYPE_WC = (2 << 5) # type: ignore
+AMDGPU_VM_MTYPE_CC = (3 << 5) # type: ignore
+AMDGPU_VM_MTYPE_UC = (4 << 5) # type: ignore
+AMDGPU_VM_MTYPE_RW = (5 << 5) # type: ignore
+AMDGPU_VM_PAGE_NOALLOC = (1 << 9) # type: ignore
+AMDGPU_HW_IP_GFX = 0 # type: ignore
+AMDGPU_HW_IP_COMPUTE = 1 # type: ignore
+AMDGPU_HW_IP_DMA = 2 # type: ignore
+AMDGPU_HW_IP_UVD = 3 # type: ignore
+AMDGPU_HW_IP_VCE = 4 # type: ignore
+AMDGPU_HW_IP_UVD_ENC = 5 # type: ignore
+AMDGPU_HW_IP_VCN_DEC = 6 # type: ignore
+AMDGPU_HW_IP_VCN_ENC = 7 # type: ignore
+AMDGPU_HW_IP_VCN_JPEG = 8 # type: ignore
+AMDGPU_HW_IP_VPE = 9 # type: ignore
+AMDGPU_HW_IP_NUM = 10 # type: ignore
+AMDGPU_HW_IP_INSTANCE_MAX_COUNT = 1 # type: ignore
+AMDGPU_CHUNK_ID_IB = 0x01 # type: ignore
+AMDGPU_CHUNK_ID_FENCE = 0x02 # type: ignore
+AMDGPU_CHUNK_ID_DEPENDENCIES = 0x03 # type: ignore
+AMDGPU_CHUNK_ID_SYNCOBJ_IN = 0x04 # type: ignore
+AMDGPU_CHUNK_ID_SYNCOBJ_OUT = 0x05 # type: ignore
+AMDGPU_CHUNK_ID_BO_HANDLES = 0x06 # type: ignore
+AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES = 0x07 # type: ignore
+AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT = 0x08 # type: ignore
+AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL = 0x09 # type: ignore
+AMDGPU_CHUNK_ID_CP_GFX_SHADOW = 0x0a # type: ignore
+AMDGPU_IB_FLAG_CE = (1<<0) # type: ignore
+AMDGPU_IB_FLAG_PREAMBLE = (1<<1) # type: ignore
+AMDGPU_IB_FLAG_PREEMPT = (1<<2) # type: ignore
+AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE = (1 << 3) # type: ignore
+AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID = (1 << 4) # type: ignore
+AMDGPU_IB_FLAGS_SECURE = (1 << 5) # type: ignore
+AMDGPU_IB_FLAG_EMIT_MEM_SYNC = (1 << 6) # type: ignore
+AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ = 0 # type: ignore
+AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD = 1 # type: ignore
+AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD = 2 # type: ignore
+AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW = 0x1 # type: ignore
+AMDGPU_IDS_FLAGS_FUSION = 0x1 # type: ignore
+AMDGPU_IDS_FLAGS_PREEMPTION = 0x2 # type: ignore
+AMDGPU_IDS_FLAGS_TMZ = 0x4 # type: ignore
+AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD = 0x8 # type: ignore
+AMDGPU_IDS_FLAGS_MODE_MASK = 0x300 # type: ignore
+AMDGPU_IDS_FLAGS_MODE_SHIFT = 0x8 # type: ignore
+AMDGPU_IDS_FLAGS_MODE_PF = 0x0 # type: ignore
+AMDGPU_IDS_FLAGS_MODE_VF = 0x1 # type: ignore
+AMDGPU_IDS_FLAGS_MODE_PT = 0x2 # type: ignore
+AMDGPU_INFO_ACCEL_WORKING = 0x00 # type: ignore
+AMDGPU_INFO_CRTC_FROM_ID = 0x01 # type: ignore
+AMDGPU_INFO_HW_IP_INFO = 0x02 # type: ignore
+AMDGPU_INFO_HW_IP_COUNT = 0x03 # type: ignore
+AMDGPU_INFO_TIMESTAMP = 0x05 # type: ignore
+AMDGPU_INFO_FW_VERSION = 0x0e # type: ignore
+AMDGPU_INFO_FW_VCE = 0x1 # type: ignore
+AMDGPU_INFO_FW_UVD = 0x2 # type: ignore
+AMDGPU_INFO_FW_GMC = 0x03 # type: ignore
+AMDGPU_INFO_FW_GFX_ME = 0x04 # type: ignore
+AMDGPU_INFO_FW_GFX_PFP = 0x05 # type: ignore
+AMDGPU_INFO_FW_GFX_CE = 0x06 # type: ignore
+AMDGPU_INFO_FW_GFX_RLC = 0x07 # type: ignore
+AMDGPU_INFO_FW_GFX_MEC = 0x08 # type: ignore
+AMDGPU_INFO_FW_SMC = 0x0a # type: ignore
+AMDGPU_INFO_FW_SDMA = 0x0b # type: ignore
+AMDGPU_INFO_FW_SOS = 0x0c # type: ignore
+AMDGPU_INFO_FW_ASD = 0x0d # type: ignore
+AMDGPU_INFO_FW_VCN = 0x0e # type: ignore
+AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL = 0x0f # type: ignore
+AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM = 0x10 # type: ignore
+AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM = 0x11 # type: ignore
+AMDGPU_INFO_FW_DMCU = 0x12 # type: ignore
+AMDGPU_INFO_FW_TA = 0x13 # type: ignore
+AMDGPU_INFO_FW_DMCUB = 0x14 # type: ignore
+AMDGPU_INFO_FW_TOC = 0x15 # type: ignore
+AMDGPU_INFO_FW_CAP = 0x16 # type: ignore
+AMDGPU_INFO_FW_GFX_RLCP = 0x17 # type: ignore
+AMDGPU_INFO_FW_GFX_RLCV = 0x18 # type: ignore
+AMDGPU_INFO_FW_MES_KIQ = 0x19 # type: ignore
+AMDGPU_INFO_FW_MES = 0x1a # type: ignore
+AMDGPU_INFO_FW_IMU = 0x1b # type: ignore
+AMDGPU_INFO_FW_VPE = 0x1c # type: ignore
+AMDGPU_INFO_NUM_BYTES_MOVED = 0x0f # type: ignore
+AMDGPU_INFO_VRAM_USAGE = 0x10 # type: ignore
+AMDGPU_INFO_GTT_USAGE = 0x11 # type: ignore
+AMDGPU_INFO_GDS_CONFIG = 0x13 # type: ignore
+AMDGPU_INFO_VRAM_GTT = 0x14 # type: ignore
+AMDGPU_INFO_READ_MMR_REG = 0x15 # type: ignore
+AMDGPU_INFO_DEV_INFO = 0x16 # type: ignore
+AMDGPU_INFO_VIS_VRAM_USAGE = 0x17 # type: ignore
+AMDGPU_INFO_NUM_EVICTIONS = 0x18 # type: ignore
+AMDGPU_INFO_MEMORY = 0x19 # type: ignore
+AMDGPU_INFO_VCE_CLOCK_TABLE = 0x1A # type: ignore
+AMDGPU_INFO_VBIOS = 0x1B # type: ignore
+AMDGPU_INFO_VBIOS_SIZE = 0x1 # type: ignore
+AMDGPU_INFO_VBIOS_IMAGE = 0x2 # type: ignore
+AMDGPU_INFO_VBIOS_INFO = 0x3 # type: ignore
+AMDGPU_INFO_NUM_HANDLES = 0x1C # type: ignore
+AMDGPU_INFO_SENSOR = 0x1D # type: ignore
+AMDGPU_INFO_SENSOR_GFX_SCLK = 0x1 # type: ignore
+AMDGPU_INFO_SENSOR_GFX_MCLK = 0x2 # type: ignore
+AMDGPU_INFO_SENSOR_GPU_TEMP = 0x3 # type: ignore
+AMDGPU_INFO_SENSOR_GPU_LOAD = 0x4 # type: ignore
+AMDGPU_INFO_SENSOR_GPU_AVG_POWER = 0x5 # type: ignore
+AMDGPU_INFO_SENSOR_VDDNB = 0x6 # type: ignore
+AMDGPU_INFO_SENSOR_VDDGFX = 0x7 # type: ignore
+AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK = 0x8 # type: ignore
+AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK = 0x9 # type: ignore
+AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK = 0xa # type: ignore
+AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK = 0xb # type: ignore
+AMDGPU_INFO_SENSOR_GPU_INPUT_POWER = 0xc # type: ignore
+AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS = 0x1E # type: ignore
+AMDGPU_INFO_VRAM_LOST_COUNTER = 0x1F # type: ignore
+AMDGPU_INFO_RAS_ENABLED_FEATURES = 0x20 # type: ignore
+AMDGPU_INFO_RAS_ENABLED_UMC = (1 << 0) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_SDMA = (1 << 1) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_GFX = (1 << 2) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_MMHUB = (1 << 3) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_ATHUB = (1 << 4) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_PCIE = (1 << 5) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_HDP = (1 << 6) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_XGMI = (1 << 7) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_DF = (1 << 8) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_SMN = (1 << 9) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_SEM = (1 << 10) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_MP0 = (1 << 11) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_MP1 = (1 << 12) # type: ignore
+AMDGPU_INFO_RAS_ENABLED_FUSE = (1 << 13) # type: ignore
+AMDGPU_INFO_VIDEO_CAPS = 0x21 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_DECODE = 0 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_ENCODE = 1 # type: ignore
+AMDGPU_INFO_MAX_IBS = 0x22 # type: ignore
+AMDGPU_INFO_GPUVM_FAULT = 0x23 # type: ignore
+AMDGPU_INFO_UQ_FW_AREAS = 0x24 # type: ignore
+AMDGPU_INFO_CAPABILITY = 0x50 # type: ignore
+AMDGPU_INFO_VIRTUAL_RANGE = 0x51 # type: ignore
+AMDGPU_CAPABILITY_PIN_MEM_FLAG = (1 << 0) # type: ignore
+AMDGPU_CAPABILITY_DIRECT_GMA_FLAG = (1 << 1) # type: ignore
+AMDGPU_INFO_MMR_SE_INDEX_SHIFT = 0 # type: ignore
+AMDGPU_INFO_MMR_SE_INDEX_MASK = 0xff # type: ignore
+AMDGPU_INFO_MMR_SH_INDEX_SHIFT = 8 # type: ignore
+AMDGPU_INFO_MMR_SH_INDEX_MASK = 0xff # type: ignore
+AMDGPU_VRAM_TYPE_UNKNOWN = 0 # type: ignore
+AMDGPU_VRAM_TYPE_GDDR1 = 1 # type: ignore
+AMDGPU_VRAM_TYPE_DDR2 = 2 # type: ignore
+AMDGPU_VRAM_TYPE_GDDR3 = 3 # type: ignore
+AMDGPU_VRAM_TYPE_GDDR4 = 4 # type: ignore
+AMDGPU_VRAM_TYPE_GDDR5 = 5 # type: ignore
+AMDGPU_VRAM_TYPE_HBM = 6 # type: ignore
+AMDGPU_VRAM_TYPE_DDR3 = 7 # type: ignore
+AMDGPU_VRAM_TYPE_DDR4 = 8 # type: ignore
+AMDGPU_VRAM_TYPE_GDDR6 = 9 # type: ignore
+AMDGPU_VRAM_TYPE_DDR5 = 10 # type: ignore
+AMDGPU_VRAM_TYPE_LPDDR4 = 11 # type: ignore
+AMDGPU_VRAM_TYPE_LPDDR5 = 12 # type: ignore
+AMDGPU_VRAM_TYPE_HBM3E = 13 # type: ignore
+AMDGPU_VRAM_TYPE_HBM_WIDTH = 4096 # type: ignore
+AMDGPU_VCE_CLOCK_TABLE_ENTRIES = 6 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 = 0 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 = 1 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 = 2 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC = 3 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC = 4 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG = 5 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 = 6 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 = 7 # type: ignore
+AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT = 8 # type: ignore
+AMDGPU_VMHUB_TYPE_MASK = 0xff # type: ignore
+AMDGPU_VMHUB_TYPE_SHIFT = 0 # type: ignore
+AMDGPU_VMHUB_TYPE_GFX = 0 # type: ignore
+AMDGPU_VMHUB_TYPE_MM0 = 1 # type: ignore
+AMDGPU_VMHUB_TYPE_MM1 = 2 # type: ignore
+AMDGPU_VMHUB_IDX_MASK = 0xff00 # type: ignore
+AMDGPU_VMHUB_IDX_SHIFT = 8 # type: ignore
+AMDGPU_FAMILY_UNKNOWN = 0 # type: ignore
+AMDGPU_FAMILY_SI = 110 # type: ignore
+AMDGPU_FAMILY_CI = 120 # type: ignore
+AMDGPU_FAMILY_KV = 125 # type: ignore
+AMDGPU_FAMILY_VI = 130 # type: ignore
+AMDGPU_FAMILY_CZ = 135 # type: ignore
+AMDGPU_FAMILY_AI = 141 # type: ignore
+AMDGPU_FAMILY_RV = 142 # type: ignore
+AMDGPU_FAMILY_NV = 143 # type: ignore
+AMDGPU_FAMILY_VGH = 144 # type: ignore
+AMDGPU_FAMILY_GC_11_0_0 = 145 # type: ignore
+AMDGPU_FAMILY_YC = 146 # type: ignore
+AMDGPU_FAMILY_GC_11_0_1 = 148 # type: ignore
+AMDGPU_FAMILY_GC_10_3_6 = 149 # type: ignore
+AMDGPU_FAMILY_GC_10_3_7 = 151 # type: ignore
+AMDGPU_FAMILY_GC_11_5_0 = 150 # type: ignore
+AMDGPU_FAMILY_GC_12_0_0 = 152 # type: ignore
+AMDGPU_SUA_APERTURE_PRIVATE = 1 # type: ignore
+AMDGPU_SUA_APERTURE_SHARED = 2 # type: ignore
+AMDGPU_FREESYNC_FULLSCREEN_ENTER = 1 # type: ignore
+AMDGPU_FREESYNC_FULLSCREEN_EXIT = 2 # type: ignore
\ No newline at end of file
diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py
index c8cb3d0299..b58689f61e 100644
--- a/tinygrad/runtime/ops_amd.py
+++ b/tinygrad/runtime/ops_amd.py
@@ -11,7 +11,7 @@ from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, Profil
 from tinygrad.helpers import VIZ, AMD_CC, AMD_LLVM, ceildiv
 from tinygrad.renderer.cstyle import AMDHIPRenderer, AMDHIPCCRenderer
 from tinygrad.renderer.llvmir import AMDLLVMRenderer
-from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd
+from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd, amdgpu_drm
 from tinygrad.runtime.autogen.am import am
 from tinygrad.runtime.support.elf import elf_loader
 from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager
@@ -181,11 +181,13 @@ class AMDComputeQueue(HWQueue):
       for xcc in range(s.xcc):
         with self.pred_exec(xcc_mask=1 << xcc):
           for inst, se_idx, sa_idx, wgp_idx in itertools.product(range(s.inst), range(s.se), range(s.sa), range(s.wgp)):
+            loff = next(offset)
+            if s.wgp > 1 and not self.dev.iface.is_wgp_active(xcc, se_idx, sa_idx, wgp_idx): continue
             self.set_grbm(**({'instance':inst} if s.inst > 1 else ({'se':se_idx}|({'sh':sa_idx, 'wgp':wgp_idx} if self.dev.target[0] != 9 else {}))))
 
             # Copy counter to memory (src_sel = perf, dst_sel = tc_l2)
             lo, hi = getattr(self.gc, f'{s.regsample}_LO'), getattr(self.gc, f'{s.regsample}_HI', None)
-            self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+(loff:=next(offset))))
+            self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+loff))
             if hi is not None: self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, hi.addr[0], 0, *data64_le(buf.va_addr+loff+4))
 
     return self.pmc_reset_counters(en=True)
@@ -806,6 +808,13 @@ class KFDIface:
       else:
         raise RuntimeError("PMC/SQTT requires stable power state: run `amd-smi set -l stable_std` for KFD iface")
 
+  @functools.cached_property
+  def drm_dev_info(self) -> amdgpu_drm.struct_drm_amdgpu_info_device:
+    amdgpu_drm.DRM_IOCTL_AMDGPU_INFO(self.drm_fd, query=amdgpu_drm.AMDGPU_INFO_DEV_INFO,
+      return_pointer=ctypes.addressof(inf:=amdgpu_drm.struct_drm_amdgpu_info_device()), return_size=ctypes.sizeof(inf))
+    return inf
+  def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return ((self.drm_dev_info.cu_bitmap[se % 4][sa + (se // 4) * 2] >> (2 * wgp)) & 0x3) == 0x3
+
 class PCIIface(PCIIfaceBase):
   gpus:ClassVar[list[str]] = []
 
@@ -816,6 +825,7 @@ class PCIIface(PCIIfaceBase):
     self.pci_dev.write_config(pci.PCI_COMMAND, self.pci_dev.read_config(pci.PCI_COMMAND, 2) | pci.PCI_COMMAND_MASTER, 2)
 
   def require_profile_mode(self): return True
+  def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return True # TODO: account for WGP disablement on some asics.
 
   def _setup_adev(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None):
     self.dev_impl:AMDev = AMDev(pci_dev, dma_regions)