diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 178333c974..8ef7c8eefb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -727,6 +727,8 @@ jobs: - name: Run pytest (cuda) # skip multitensor because it's slow run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --ignore test/test_multitensor.py --durations=20 + - name: Run TestOps.test_add with PMA + run: VIZ=1 PMA=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add - name: Run process replay tests uses: ./.github/actions/process-replay diff --git a/test/mockgpu/nv/nvdriver.py b/test/mockgpu/nv/nvdriver.py index f843923a4a..6bd788719d 100644 --- a/test/mockgpu/nv/nvdriver.py +++ b/test/mockgpu/nv/nvdriver.py @@ -19,6 +19,7 @@ NVAllocation = collections.namedtuple('NVAllocation', ['device', 'size', 'is_sig NVChannelGroup = collections.namedtuple('NVChannelGroup', ['device']) NVContextShare = collections.namedtuple('NVContextShare', ['channel_group']) NVGPFIFO = collections.namedtuple('NVGPFIFO', ['device', 'token']) +NVProfiler = collections.namedtuple('NVProfiler', ['subdevice']) class NVCtlFileDesc(VirtFileDesc): def __init__(self, fd, driver): @@ -140,6 +141,10 @@ class NVDriver(VirtDriver): struct.hObjectNew = self._alloc_handle() elif struct.hClass == nv_gpu.GT200_DEBUGGER: struct.hObjectNew = self._alloc_handle() + elif struct.hClass == nv_gpu.MAXWELL_PROFILER_DEVICE: + assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVSubDevice) + struct.hObjectNew = self._alloc_handle() + self.object_by_handle[struct.hObjectNew] = NVProfiler(self.object_by_handle[struct.hObjectParent]) else: raise RuntimeError(f"Unknown {struct.hClass} to rm_alloc") return 0 @@ -204,6 +209,14 @@ class NVDriver(VirtDriver): elif struct.cmd == nv_gpu.NV0000_CTRL_CMD_SYSTEM_GET_BUILD_VERSION_V2: params = nv_gpu.NV0000_CTRL_SYSTEM_GET_BUILD_VERSION_V2_PARAMS.from_address(params_ptr) params.driverVersionBuffer = b"570.00.00\0" + elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_GR_GET_TPC_MASK: + params = nv_gpu.NV2080_CTRL_GR_GET_TPC_MASK_PARAMS.from_address(params_ptr) + params.tpcMask = 0x1 # one TPC + # Profiler commands - just pass through for mockgpu + elif struct.cmd in (nv_gpu.NVB0CC_CTRL_CMD_POWER_REQUEST_FEATURES, nv_gpu.NVB0CC_CTRL_CMD_ALLOC_PMA_STREAM, + nv_gpu.NVB0CC_CTRL_CMD_RESERVE_HWPM_LEGACY, nv_gpu.NVB0CC_CTRL_CMD_RESERVE_PM_AREA_PC_SAMPLER, + nv_gpu.NVB0CC_CTRL_CMD_BIND_PM_RESOURCES, nv_gpu.NVB0CC_CTRL_CMD_SET_HS_CREDITS, + nv_gpu.NVB0CC_CTRL_CMD_EXEC_REG_OPS, nv_gpu.NVB0CC_CTRL_CMD_PMA_STREAM_UPDATE_GET_PUT): pass else: raise RuntimeError(f"Unknown {struct.cmd} to rm_control") return 0 @@ -213,14 +226,11 @@ class NVDriver(VirtDriver): elif nr == nv_gpu.NV_ESC_RM_CONTROL: return self.rm_control(argp) elif nr == nv_gpu.NV_ESC_RM_MAP_MEMORY: st:Any = nv_gpu.nv_ioctl_nvos33_parameters_with_fd.from_address(argp) - obj = self.object_by_handle[st.params.hMemory] - if isinstance(obj, NVUserMode): - file = self.opened_fds[st.fd] - assert isinstance(file, NVDevFileDesc) + obj = self.object_by_handle.get(st.params.hMemory) + file = self.opened_fds.get(st.fd) + if isinstance(obj, NVUserMode) and isinstance(file, NVDevFileDesc): file._mapping_userland = True - elif isinstance(obj, NVAllocation) and obj.is_signal: - file = self.opened_fds[st.fd] - assert isinstance(file, NVDevFileDesc) + elif isinstance(obj, NVAllocation) and obj.is_signal and isinstance(file, NVDevFileDesc): file._mapping_signal = True elif nr == nv_gpu.NV_ESC_RM_FREE: st = nv_gpu.NVOS00_PARAMETERS.from_address(argp) diff --git a/tinygrad/runtime/autogen/__init__.py b/tinygrad/runtime/autogen/__init__.py index 5c5174b335..fe4aa885bd 100644 --- a/tinygrad/runtime/autogen/__init__.py +++ b/tinygrad/runtime/autogen/__init__.py @@ -45,13 +45,13 @@ def __getattr__(nm): return load(nm, None, [ *[root/"extra/nv_gpu_driver"/s for s in ["clc9b0.h", "clc6c0qmd.h","clcec0qmd.h", "nvdec_drv.h"]], "{}/kernel-open/common/inc/nvmisc.h", *[f"{{}}/src/common/sdk/nvidia/inc/class/cl{s}.h" for s in ["0000", "0070", "0080", "2080", "2080_notification", "c56f", "c86f", "c96f", "c761", - "83de", "c6c0", "cdc0"]], + "83de", "b2cc", "c6c0", "cdc0"]], *[f"{{}}/kernel-open/nvidia-uvm/{s}.h" for s in ["clc6b5", "clc9b5", "clcfb0", "uvm_ioctl", "uvm_linux_ioctl", "hwref/ampere/ga100/dev_fault"]], *[f"{{}}/src/nvidia/arch/nvalloc/unix/include/nv{s}.h" for s in ["_escape", "-ioctl", "-ioctl-numbers", "-ioctl-numa", "-unix-nvos-params-wrappers"]], *[f"{{}}/src/common/sdk/nvidia/inc/{s}.h" for s in ["alloc/alloc_channel", "nvos", "ctrl/ctrlc36f", "ctrl/ctrlcb33", "ctrl/ctrla06c", "ctrl/ctrl90f1", "ctrl/ctrla06f/ctrla06fgpfifo"]], - *[f"{{}}/src/common/sdk/nvidia/inc/ctrl/ctrl{s}/*.h" for s in ["0000", "0080", "2080", "83de"]], + *[f"{{}}/src/common/sdk/nvidia/inc/ctrl/ctrl{s}/*.h" for s in ["0000", "0080", "2080", "83de", "b0cc"]], "{}/kernel-open/common/inc/nvstatus.h", "{}/src/nvidia/generated/g_allclasses.h" ], args=[ "-include", "{}/src/common/sdk/nvidia/inc/nvtypes.h", "-I{}/src/common/inc", "-I{}/kernel-open/nvidia-uvm", "-I{}/kernel-open/common/inc", diff --git a/tinygrad/runtime/autogen/nv_570.py b/tinygrad/runtime/autogen/nv_570.py index 76fd42bf16..386b0179c2 100644 --- a/tinygrad/runtime/autogen/nv_570.py +++ b/tinygrad/runtime/autogen/nv_570.py @@ -1600,6 +1600,12 @@ class struct_NV83DE_ALLOC_PARAMETERS(c.Struct): hClass3dObject: Annotated[NvHandle, 8] NV83DE_ALLOC_PARAMETERS: TypeAlias = struct_NV83DE_ALLOC_PARAMETERS @c.record +class struct_NVB2CC_ALLOC_PARAMETERS(c.Struct): + SIZE = 8 + hClientTarget: Annotated[NvHandle, 0] + hContextTarget: Annotated[NvHandle, 4] +NVB2CC_ALLOC_PARAMETERS: TypeAlias = struct_NVB2CC_ALLOC_PARAMETERS +@c.record class struct__clc9b5_tag0(c.Struct): SIZE = 8192 Reserved00: Annotated[c.Array[NvV32, Literal[64]], 0] @@ -12060,6 +12066,239 @@ class struct_NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS(c.Struct): SIZE = 4 value: Annotated[NvU32, 0] NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS: TypeAlias = struct_NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS +@c.record +class struct_NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS(c.Struct): + SIZE = 56 + params: Annotated[NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS, 0] +@c.record +class struct_NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS(c.Struct): + SIZE = 56 + hMemPmaBuffer: Annotated[NvHandle, 0] + pmaBufferOffset: Annotated[NvU64, 8] + pmaBufferSize: Annotated[NvU64, 16] + hMemPmaBytesAvailable: Annotated[NvHandle, 24] + pmaBytesAvailableOffset: Annotated[NvU64, 32] + ctxsw: Annotated[NvBool, 40] + pmaChannelIdx: Annotated[NvU32, 44] + pmaBufferVA: Annotated[NvU64, 48] +NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS +NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS: TypeAlias = struct_NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS(c.Struct): + SIZE = 8 + pmaChannelIdx: Annotated[NvU32, 0] + bMembytesPollingRequired: Annotated[NvBool, 4] +NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS(c.Struct): + SIZE = 48 + pmaChannelIdx: Annotated[NvU32, 0] + pmaBufferVA: Annotated[NvU64, 8] + pmaBufferSize: Annotated[NvU64, 16] + membytesVA: Annotated[NvU64, 24] + hwpmIBPA: Annotated[NvU64, 32] + hwpmIBAperture: Annotated[NvU8, 40] +NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS(c.Struct): + SIZE = 5 + bAdminProfilingPermitted: Annotated[NvBool, 0] + bDevProfilingPermitted: Annotated[NvBool, 1] + bCtxProfilingPermitted: Annotated[NvBool, 2] + bVideoMemoryProfilingPermitted: Annotated[NvBool, 3] + bSysMemoryProfilingPermitted: Annotated[NvBool, 4] +NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS(c.Struct): + SIZE = 64 + hMemPmaBuffer: Annotated[NvHandle, 0] + pmaBufferOffset: Annotated[NvU64, 8] + pmaBufferSize: Annotated[NvU64, 16] + hMemPmaBytesAvailable: Annotated[NvHandle, 24] + pmaBytesAvailableOffset: Annotated[NvU64, 32] + ctxsw: Annotated[NvBool, 40] + pmaChannelIdx: Annotated[NvU32, 44] + pmaBufferVA: Annotated[NvU64, 48] + bInputPmaChIdx: Annotated[NvBool, 56] +NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS(c.Struct): + SIZE = 4 + pmaChannelIdx: Annotated[NvU32, 0] +NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS(c.Struct): + SIZE = 4 + maxPmaChannels: Annotated[NvU32, 0] +NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS +@c.record +class struct_NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS(c.Struct): + SIZE = 12 + globalStatus: Annotated[NvU32, 0] + controlMask: Annotated[NvU32, 4] + statusMask: Annotated[NvU32, 8] +NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS +@c.record +class struct_NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS(c.Struct): + SIZE = 4 + controlMask: Annotated[NvU32, 0] +NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS +@c.record +class struct_NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS +@c.record +class struct_NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS +@c.record +class struct_NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS(c.Struct): + SIZE = 4 + pmaChannelIdx: Annotated[NvU32, 0] +NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS(c.Struct): + SIZE = 48 + bytesConsumed: Annotated[NvU64, 0] + bUpdateAvailableBytes: Annotated[NvBool, 8] + bWait: Annotated[NvBool, 9] + bytesAvailable: Annotated[NvU64, 16] + bReturnPut: Annotated[NvBool, 24] + putPtr: Annotated[NvU64, 32] + pmaChannelIdx: Annotated[NvU32, 40] + bOverflowStatus: Annotated[NvBool, 44] +NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS +NVB0CC_GPU_REG_OP: TypeAlias = struct_NV2080_CTRL_GPU_REG_OP +class enum_NVB0CC_REGOPS_MODE(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_REGOPS_MODE_ALL_OR_NONE = enum_NVB0CC_REGOPS_MODE.define('NVB0CC_REGOPS_MODE_ALL_OR_NONE', 0) +NVB0CC_REGOPS_MODE_CONTINUE_ON_ERROR = enum_NVB0CC_REGOPS_MODE.define('NVB0CC_REGOPS_MODE_CONTINUE_ON_ERROR', 1) + +NVB0CC_REGOPS_MODE: TypeAlias = enum_NVB0CC_REGOPS_MODE +@c.record +class struct_NVB0CC_CTRL_EXEC_REG_OPS_PARAMS(c.Struct): + SIZE = 3980 + regOpCount: Annotated[NvU32, 0] + mode: Annotated[NVB0CC_REGOPS_MODE, 4] + bPassed: Annotated[NvBool, 8] + bDirect: Annotated[NvBool, 9] + regOps: Annotated[c.Array[NVB0CC_GPU_REG_OP, Literal[124]], 12] +NVB0CC_CTRL_EXEC_REG_OPS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_EXEC_REG_OPS_PARAMS +@c.record +class struct_NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS(c.Struct): + SIZE = 4 + numCredits: Annotated[NvU32, 0] +NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS +class enum_NVB0CC_CHIPLET_TYPE(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_CHIPLET_TYPE_INVALID = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_INVALID', 0) +NVB0CC_CHIPLET_TYPE_FBP = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_FBP', 1) +NVB0CC_CHIPLET_TYPE_GPC = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_GPC', 2) +NVB0CC_CHIPLET_TYPE_SYS = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_SYS', 3) + +NVB0CC_CHIPLET_TYPE: TypeAlias = enum_NVB0CC_CHIPLET_TYPE +class enum_NVB0CC_HS_CREDITS_CMD_STATUS(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_HS_CREDITS_CMD_STATUS_OK = enum_NVB0CC_HS_CREDITS_CMD_STATUS.define('NVB0CC_HS_CREDITS_CMD_STATUS_OK', 0) +NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CREDITS = enum_NVB0CC_HS_CREDITS_CMD_STATUS.define('NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CREDITS', 1) +NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CHIPLET = enum_NVB0CC_HS_CREDITS_CMD_STATUS.define('NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CHIPLET', 2) + +NVB0CC_HS_CREDITS_CMD_STATUS: TypeAlias = enum_NVB0CC_HS_CREDITS_CMD_STATUS +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO(c.Struct): + SIZE = 4 + chipletType: Annotated[NvU8, 0] + chipletIndex: Annotated[NvU8, 1] + numCredits: Annotated[NvU16, 2] +NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS(c.Struct): + SIZE = 2 + status: Annotated[NvU8, 0] + entryIndex: Annotated[NvU8, 1] +NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS +@c.record +class struct_NVB0CC_CTRL_HS_CREDITS_PARAMS(c.Struct): + SIZE = 256 + pmaChannelIdx: Annotated[NvU8, 0] + numEntries: Annotated[NvU8, 1] + statusInfo: Annotated[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS, 2] + creditInfo: Annotated[c.Array[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO, Literal[63]], 4] +NVB0CC_CTRL_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_HS_CREDITS_PARAMS +NVB0CC_CTRL_SET_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_HS_CREDITS_PARAMS +NVB0CC_CTRL_GET_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_HS_CREDITS_PARAMS +class enum_NVB0CC_CTRL_HES_TYPE(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_CTRL_HES_INVALID = enum_NVB0CC_CTRL_HES_TYPE.define('NVB0CC_CTRL_HES_INVALID', 0) +NVB0CC_CTRL_HES_CWD = enum_NVB0CC_CTRL_HES_TYPE.define('NVB0CC_CTRL_HES_CWD', 1) + +NVB0CC_CTRL_HES_TYPE: TypeAlias = enum_NVB0CC_CTRL_HES_TYPE +@c.record +class struct_NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS +@c.record +class union_NVB0CC_CTRL_HES_RESERVATION_UNION(c.Struct): + SIZE = 1 + cwd: Annotated[NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS, 0] +NVB0CC_CTRL_HES_RESERVATION_UNION: TypeAlias = union_NVB0CC_CTRL_HES_RESERVATION_UNION +@c.record +class struct_NVB0CC_CTRL_RESERVE_HES_PARAMS(c.Struct): + SIZE = 8 + type: Annotated[NvU32, 0] + reserveParams: Annotated[NVB0CC_CTRL_HES_RESERVATION_UNION, 4] +NVB0CC_CTRL_RESERVE_HES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_HES_PARAMS +@c.record +class struct_NVB0CC_CTRL_RELEASE_HES_PARAMS(c.Struct): + SIZE = 4 + type: Annotated[NVB0CC_CTRL_HES_TYPE, 0] +NVB0CC_CTRL_RELEASE_HES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RELEASE_HES_PARAMS +@c.record +class struct_NVB0CC_CTRL_CREDIT_POOL_INFO(c.Struct): + SIZE = 4 + numCredits: Annotated[NvU16, 0] + poolIndex: Annotated[NvU8, 2] + chipletType: Annotated[NvU8, 3] +NVB0CC_CTRL_CREDIT_POOL_INFO: TypeAlias = struct_NVB0CC_CTRL_CREDIT_POOL_INFO +@c.record +class struct_NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL(c.Struct): + SIZE = 124 + poolInfos: Annotated[c.Array[NVB0CC_CTRL_CREDIT_POOL_INFO, Literal[30]], 0] + poolInfosCount: Annotated[NvU32, 120] +NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL: TypeAlias = struct_NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO(c.Struct): + SIZE = 3 + chipletType: Annotated[NvU8, 0] + chipletIndex: Annotated[NvU8, 1] + poolIndex: Annotated[NvU8, 2] +NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO +@c.record +class struct_NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS(c.Struct): + SIZE = 194 + numQueries: Annotated[NvU16, 0] + statusInfo: Annotated[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS, 2] + queries: Annotated[c.Array[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO, Literal[63]], 4] +NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS: TypeAlias = struct_NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS +@c.record +class struct_NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS(c.Struct): + SIZE = 1 + disable: Annotated[NvBool, 0] +NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS: TypeAlias = struct_NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS +@c.record +class struct_NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS(c.Struct): + SIZE = 1 + enabled: Annotated[NvBool, 0] +NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS +@c.record +class struct_NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS class nv_status_codes(Annotated[int, ctypes.c_uint32], c.Enum): pass NV_OK = nv_status_codes.define('NV_OK', 0) NV_ERR_GENERIC = nv_status_codes.define('NV_ERR_GENERIC', 65535) @@ -14889,6 +15128,8 @@ NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE = 0x00000004 # type: ignore BLACKWELL_USERMODE_A = (0xc761) # type: ignore GT200_DEBUGGER = (0x83de) # type: ignore NV83DE_ALLOC_PARAMETERS_MESSAGE_ID = (0x83de) # type: ignore +MAXWELL_PROFILER_DEVICE = (0xb2cc) # type: ignore +NVB2CC_ALLOC_PARAMETERS_MESSAGE_ID = (0xb2cc) # type: ignore AMPERE_COMPUTE_A = 0xC6C0 # type: ignore NVC6C0_SET_OBJECT = 0x0000 # type: ignore NVC6C0_NO_OPERATION = 0x0100 # type: ignore @@ -22175,6 +22416,132 @@ NV83DE_CTRL_CMD_DEBUG_GET_MODE_MMU_GCC_DEBUG = (0x83de032b) # type: ignore NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS_MESSAGE_ID = (0x2B) # type: ignore NV83DE_CTRL_CMD_DEBUG_GET_MODE_MMU_GCC_DEBUG_ENABLED = (0x00000001) # type: ignore NV83DE_CTRL_CMD_DEBUG_GET_MODE_MMU_GCC_DEBUG_DISABLED = (0x00000002) # type: ignore +NVB0CC_CTRL_RESERVED = (0x00) # type: ignore +NVB0CC_CTRL_PROFILER = (0x01) # type: ignore +NVB0CC_CTRL_INTERNAL = (0x02) # type: ignore +NVB0CC_CTRL_CMD_NULL = (0xb0cc0000) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM = (0xb0cc0204) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS_MESSAGE_ID = (0x0) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_QUIESCE_PMA_CHANNEL = (0xb0cc0201) # type: ignore +NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS_MESSAGE_ID = (0x1) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_SRIOV_PROMOTE_PMA_STREAM = (0xb0cc0202) # type: ignore +NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS_MESSAGE_ID = (0x2) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_PERMISSIONS_INIT = (0xb0cc0203) # type: ignore +NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS_MESSAGE_ID = (0x3) # type: ignore +NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS_MESSAGE_ID = (0x4) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_FREE_PMA_STREAM = (0xb0cc0206) # type: ignore +NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS_MESSAGE_ID = (0x6) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_GET_MAX_PMAS = (0xb0cc0207) # type: ignore +NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS_MESSAGE_ID = (0x7) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_BIND_PM_RESOURCES = (0xb0cc0208) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_UNBIND_PM_RESOURCES = (0xb0cc0209) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_RESERVE_HWPM_LEGACY = (0xb0cc020a) # type: ignore +NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS_MESSAGE_ID = (0xa) # type: ignore +NVB0CC_CTRL_CMD_POWER_REQUEST_FEATURES = (0xb0cc0301) # type: ignore +NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS_MESSAGE_ID = (0x1) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_IGNORE = (0x00000000) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_DISABLE = (0x00000001) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_ENABLE = (0x00000002) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED = (0x00000000) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED = (0x00000001) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED = (0x00000002) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED = (0x00000003) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_CMD_POWER_RELEASE_FEATURES = (0xb0cc0302) # type: ignore +NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS_MESSAGE_ID = (0x2) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_RELEASE = (0x00000003) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_CMD_RESERVE_HWPM_LEGACY = (0xb0cc0101) # type: ignore +NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS_MESSAGE_ID = (0x1) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_HWPM_LEGACY = (0xb0cc0102) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_PM_AREA_SMPC = (0xb0cc0103) # type: ignore +NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS_MESSAGE_ID = (0x3) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_PM_AREA_SMPC = (0xb0cc0104) # type: ignore +NVB0CC_CTRL_CMD_ALLOC_PMA_STREAM = (0xb0cc0105) # type: ignore +NVB0CC_PMA_BUFFER_SIZE_MAX = (0xffe00000) # type: ignore +NVB0CC_PMA_BYTES_AVAILABLE_SIZE = (0x1000) # type: ignore +NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS_MESSAGE_ID = (0x5) # type: ignore +NVB0CC_CTRL_CMD_FREE_PMA_STREAM = (0xb0cc0106) # type: ignore +NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS_MESSAGE_ID = (0x6) # type: ignore +NVB0CC_CTRL_CMD_BIND_PM_RESOURCES = (0xb0cc0107) # type: ignore +NVB0CC_CTRL_CMD_UNBIND_PM_RESOURCES = (0xb0cc0108) # type: ignore +NVB0CC_CTRL_CMD_PMA_STREAM_UPDATE_GET_PUT = (0xb0cc0109) # type: ignore +NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE = 0xFFFFFFFF # type: ignore +NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS_MESSAGE_ID = (0x9) # type: ignore +NVB0CC_REGOPS_MAX_COUNT = (124) # type: ignore +NVB0CC_CTRL_CMD_EXEC_REG_OPS = (0xb0cc010a) # type: ignore +NVB0CC_CTRL_EXEC_REG_OPS_PARAMS_MESSAGE_ID = (0xA) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_PM_AREA_PC_SAMPLER = (0xb0cc010b) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_PM_AREA_PC_SAMPLER = (0xb0cc010c) # type: ignore +NVB0CC_CTRL_CMD_GET_TOTAL_HS_CREDITS = (0xb0cc010d) # type: ignore +NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS_MESSAGE_ID = (0xD) # type: ignore +NVB0CC_CTRL_CMD_SET_HS_CREDITS = (0xb0cc010e) # type: ignore +NVB0CC_MAX_CREDIT_INFO_ENTRIES = (63) # type: ignore +NVB0CC_CTRL_SET_HS_CREDITS_PARAMS_MESSAGE_ID = (0xE) # type: ignore +NVB0CC_CTRL_CMD_GET_HS_CREDITS = (0xb0cc010f) # type: ignore +NVB0CC_CTRL_GET_HS_CREDITS_PARAMS_MESSAGE_ID = (0xF) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_HES = (0xb0cc0113) # type: ignore +NVB0CC_CTRL_RESERVE_HES_PARAMS_MESSAGE_ID = (0x13) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_HES = (0xb0cc0114) # type: ignore +NVB0CC_CTRL_RELEASE_HES_PARAMS_MESSAGE_ID = (0x14) # type: ignore +NVB0CC_CREDIT_POOL_MAX_COUNT = 30 # type: ignore +NVB0CC_CTRL_CMD_GET_CHIPLET_HS_CREDIT_POOL = (0xb0cc0115) # type: ignore +NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL_MESSAGE_ID = (0x15) # type: ignore +NVB0CC_CTRL_CMD_GET_HS_CREDITS_MAPPING = (0xb0cc0116) # type: ignore +NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS_MESSAGE_ID = (0x16) # type: ignore +NVB0CC_CTRL_CMD_DISABLE_DYNAMIC_MMA_BOOST = (0xb0cc0117) # type: ignore +NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS_MESSAGE_ID = (0x17) # type: ignore +NVB0CC_CTRL_CMD_GET_DYNAMIC_MMA_BOOST_STATUS = (0xb0cc0118) # type: ignore +NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS_MESSAGE_ID = (0x18) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_CCU_PROF = (0xb0cc0119) # type: ignore +NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS_MESSAGE_ID = (0x19) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_CCU_PROF = (0xb0cc011a) # type: ignore NV_STATUS_LEVEL_OK = 0 # type: ignore NV_STATUS_LEVEL_WARN = 1 # type: ignore NV_STATUS_LEVEL_ERR = 3 # type: ignore diff --git a/tinygrad/runtime/autogen/nv_580.py b/tinygrad/runtime/autogen/nv_580.py index d3865bf948..a2762e1831 100644 --- a/tinygrad/runtime/autogen/nv_580.py +++ b/tinygrad/runtime/autogen/nv_580.py @@ -1600,6 +1600,12 @@ class struct_NV83DE_ALLOC_PARAMETERS(c.Struct): hClass3dObject: Annotated[NvHandle, 8] NV83DE_ALLOC_PARAMETERS: TypeAlias = struct_NV83DE_ALLOC_PARAMETERS @c.record +class struct_NVB2CC_ALLOC_PARAMETERS(c.Struct): + SIZE = 8 + hClientTarget: Annotated[NvHandle, 0] + hContextTarget: Annotated[NvHandle, 4] +NVB2CC_ALLOC_PARAMETERS: TypeAlias = struct_NVB2CC_ALLOC_PARAMETERS +@c.record class UVM_RESERVE_VA_PARAMS(c.Struct): SIZE = 24 requestedBase: Annotated[NvU64, 0] @@ -13042,6 +13048,239 @@ class struct_NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS(c.Struct): SIZE = 4 value: Annotated[NvU32, 0] NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS: TypeAlias = struct_NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS +@c.record +class struct_NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS(c.Struct): + SIZE = 56 + params: Annotated[NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS, 0] +@c.record +class struct_NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS(c.Struct): + SIZE = 56 + hMemPmaBuffer: Annotated[NvHandle, 0] + pmaBufferOffset: Annotated[NvU64, 8] + pmaBufferSize: Annotated[NvU64, 16] + hMemPmaBytesAvailable: Annotated[NvHandle, 24] + pmaBytesAvailableOffset: Annotated[NvU64, 32] + ctxsw: Annotated[NvBool, 40] + pmaChannelIdx: Annotated[NvU32, 44] + pmaBufferVA: Annotated[NvU64, 48] +NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS +NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS: TypeAlias = struct_NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS(c.Struct): + SIZE = 8 + pmaChannelIdx: Annotated[NvU32, 0] + bMembytesPollingRequired: Annotated[NvBool, 4] +NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS(c.Struct): + SIZE = 48 + pmaChannelIdx: Annotated[NvU32, 0] + pmaBufferVA: Annotated[NvU64, 8] + pmaBufferSize: Annotated[NvU64, 16] + membytesVA: Annotated[NvU64, 24] + hwpmIBPA: Annotated[NvU64, 32] + hwpmIBAperture: Annotated[NvU8, 40] +NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS(c.Struct): + SIZE = 5 + bAdminProfilingPermitted: Annotated[NvBool, 0] + bDevProfilingPermitted: Annotated[NvBool, 1] + bCtxProfilingPermitted: Annotated[NvBool, 2] + bVideoMemoryProfilingPermitted: Annotated[NvBool, 3] + bSysMemoryProfilingPermitted: Annotated[NvBool, 4] +NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS(c.Struct): + SIZE = 64 + hMemPmaBuffer: Annotated[NvHandle, 0] + pmaBufferOffset: Annotated[NvU64, 8] + pmaBufferSize: Annotated[NvU64, 16] + hMemPmaBytesAvailable: Annotated[NvHandle, 24] + pmaBytesAvailableOffset: Annotated[NvU64, 32] + ctxsw: Annotated[NvBool, 40] + pmaChannelIdx: Annotated[NvU32, 44] + pmaBufferVA: Annotated[NvU64, 48] + bInputPmaChIdx: Annotated[NvBool, 56] +NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS(c.Struct): + SIZE = 4 + pmaChannelIdx: Annotated[NvU32, 0] +NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS(c.Struct): + SIZE = 4 + maxPmaChannels: Annotated[NvU32, 0] +NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS +@c.record +class struct_NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS: TypeAlias = struct_NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS +@c.record +class struct_NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS(c.Struct): + SIZE = 12 + globalStatus: Annotated[NvU32, 0] + controlMask: Annotated[NvU32, 4] + statusMask: Annotated[NvU32, 8] +NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS +@c.record +class struct_NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS(c.Struct): + SIZE = 4 + controlMask: Annotated[NvU32, 0] +NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS +@c.record +class struct_NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS +@c.record +class struct_NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS +@c.record +class struct_NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS(c.Struct): + SIZE = 4 + pmaChannelIdx: Annotated[NvU32, 0] +NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS: TypeAlias = struct_NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS(c.Struct): + SIZE = 48 + bytesConsumed: Annotated[NvU64, 0] + bUpdateAvailableBytes: Annotated[NvBool, 8] + bWait: Annotated[NvBool, 9] + bytesAvailable: Annotated[NvU64, 16] + bReturnPut: Annotated[NvBool, 24] + putPtr: Annotated[NvU64, 32] + pmaChannelIdx: Annotated[NvU32, 40] + bOverflowStatus: Annotated[NvBool, 44] +NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS +NVB0CC_GPU_REG_OP: TypeAlias = struct_NV2080_CTRL_GPU_REG_OP +class enum_NVB0CC_REGOPS_MODE(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_REGOPS_MODE_ALL_OR_NONE = enum_NVB0CC_REGOPS_MODE.define('NVB0CC_REGOPS_MODE_ALL_OR_NONE', 0) +NVB0CC_REGOPS_MODE_CONTINUE_ON_ERROR = enum_NVB0CC_REGOPS_MODE.define('NVB0CC_REGOPS_MODE_CONTINUE_ON_ERROR', 1) + +NVB0CC_REGOPS_MODE: TypeAlias = enum_NVB0CC_REGOPS_MODE +@c.record +class struct_NVB0CC_CTRL_EXEC_REG_OPS_PARAMS(c.Struct): + SIZE = 3980 + regOpCount: Annotated[NvU32, 0] + mode: Annotated[NVB0CC_REGOPS_MODE, 4] + bPassed: Annotated[NvBool, 8] + bDirect: Annotated[NvBool, 9] + regOps: Annotated[c.Array[NVB0CC_GPU_REG_OP, Literal[124]], 12] +NVB0CC_CTRL_EXEC_REG_OPS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_EXEC_REG_OPS_PARAMS +@c.record +class struct_NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS(c.Struct): + SIZE = 4 + numCredits: Annotated[NvU32, 0] +NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS +class enum_NVB0CC_CHIPLET_TYPE(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_CHIPLET_TYPE_INVALID = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_INVALID', 0) +NVB0CC_CHIPLET_TYPE_FBP = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_FBP', 1) +NVB0CC_CHIPLET_TYPE_GPC = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_GPC', 2) +NVB0CC_CHIPLET_TYPE_SYS = enum_NVB0CC_CHIPLET_TYPE.define('NVB0CC_CHIPLET_TYPE_SYS', 3) + +NVB0CC_CHIPLET_TYPE: TypeAlias = enum_NVB0CC_CHIPLET_TYPE +class enum_NVB0CC_HS_CREDITS_CMD_STATUS(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_HS_CREDITS_CMD_STATUS_OK = enum_NVB0CC_HS_CREDITS_CMD_STATUS.define('NVB0CC_HS_CREDITS_CMD_STATUS_OK', 0) +NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CREDITS = enum_NVB0CC_HS_CREDITS_CMD_STATUS.define('NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CREDITS', 1) +NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CHIPLET = enum_NVB0CC_HS_CREDITS_CMD_STATUS.define('NVB0CC_HS_CREDITS_CMD_STATUS_INVALID_CHIPLET', 2) + +NVB0CC_HS_CREDITS_CMD_STATUS: TypeAlias = enum_NVB0CC_HS_CREDITS_CMD_STATUS +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO(c.Struct): + SIZE = 4 + chipletType: Annotated[NvU8, 0] + chipletIndex: Annotated[NvU8, 1] + numCredits: Annotated[NvU16, 2] +NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS(c.Struct): + SIZE = 2 + status: Annotated[NvU8, 0] + entryIndex: Annotated[NvU8, 1] +NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS +@c.record +class struct_NVB0CC_CTRL_HS_CREDITS_PARAMS(c.Struct): + SIZE = 256 + pmaChannelIdx: Annotated[NvU8, 0] + numEntries: Annotated[NvU8, 1] + statusInfo: Annotated[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS, 2] + creditInfo: Annotated[c.Array[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO, Literal[63]], 4] +NVB0CC_CTRL_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_HS_CREDITS_PARAMS +NVB0CC_CTRL_SET_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_HS_CREDITS_PARAMS +NVB0CC_CTRL_GET_HS_CREDITS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_HS_CREDITS_PARAMS +class enum_NVB0CC_CTRL_HES_TYPE(Annotated[int, ctypes.c_uint32], c.Enum): pass +NVB0CC_CTRL_HES_INVALID = enum_NVB0CC_CTRL_HES_TYPE.define('NVB0CC_CTRL_HES_INVALID', 0) +NVB0CC_CTRL_HES_CWD = enum_NVB0CC_CTRL_HES_TYPE.define('NVB0CC_CTRL_HES_CWD', 1) + +NVB0CC_CTRL_HES_TYPE: TypeAlias = enum_NVB0CC_CTRL_HES_TYPE +@c.record +class struct_NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS +@c.record +class union_NVB0CC_CTRL_HES_RESERVATION_UNION(c.Struct): + SIZE = 1 + cwd: Annotated[NVB0CC_CTRL_RESERVE_HES_CWD_PARAMS, 0] +NVB0CC_CTRL_HES_RESERVATION_UNION: TypeAlias = union_NVB0CC_CTRL_HES_RESERVATION_UNION +@c.record +class struct_NVB0CC_CTRL_RESERVE_HES_PARAMS(c.Struct): + SIZE = 8 + type: Annotated[NvU32, 0] + reserveParams: Annotated[NVB0CC_CTRL_HES_RESERVATION_UNION, 4] +NVB0CC_CTRL_RESERVE_HES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_HES_PARAMS +@c.record +class struct_NVB0CC_CTRL_RELEASE_HES_PARAMS(c.Struct): + SIZE = 4 + type: Annotated[NVB0CC_CTRL_HES_TYPE, 0] +NVB0CC_CTRL_RELEASE_HES_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RELEASE_HES_PARAMS +@c.record +class struct_NVB0CC_CTRL_CREDIT_POOL_INFO(c.Struct): + SIZE = 4 + numCredits: Annotated[NvU16, 0] + poolIndex: Annotated[NvU8, 2] + chipletType: Annotated[NvU8, 3] +NVB0CC_CTRL_CREDIT_POOL_INFO: TypeAlias = struct_NVB0CC_CTRL_CREDIT_POOL_INFO +@c.record +class struct_NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL(c.Struct): + SIZE = 124 + poolInfos: Annotated[c.Array[NVB0CC_CTRL_CREDIT_POOL_INFO, Literal[30]], 0] + poolInfosCount: Annotated[NvU32, 120] +NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL: TypeAlias = struct_NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL +@c.record +class struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO(c.Struct): + SIZE = 3 + chipletType: Annotated[NvU8, 0] + chipletIndex: Annotated[NvU8, 1] + poolIndex: Annotated[NvU8, 2] +NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO: TypeAlias = struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO +@c.record +class struct_NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS(c.Struct): + SIZE = 194 + numQueries: Annotated[NvU16, 0] + statusInfo: Annotated[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_STATUS, 2] + queries: Annotated[c.Array[NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_MAPPING_INFO, Literal[63]], 4] +NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS: TypeAlias = struct_NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS +@c.record +class struct_NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS(c.Struct): + SIZE = 1 + disable: Annotated[NvBool, 0] +NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS: TypeAlias = struct_NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS +@c.record +class struct_NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS(c.Struct): + SIZE = 1 + enabled: Annotated[NvBool, 0] +NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS: TypeAlias = struct_NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS +@c.record +class struct_NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS(c.Struct): + SIZE = 1 + ctxsw: Annotated[NvBool, 0] +NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS: TypeAlias = struct_NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS class nv_status_codes(Annotated[int, ctypes.c_uint32], c.Enum): pass NV_OK = nv_status_codes.define('NV_OK', 0) NV_ERR_GENERIC = nv_status_codes.define('NV_ERR_GENERIC', 65535) @@ -15907,6 +16146,8 @@ NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE = 0x00000004 # type: ignore BLACKWELL_USERMODE_A = (0xc761) # type: ignore GT200_DEBUGGER = (0x83de) # type: ignore NV83DE_ALLOC_PARAMETERS_MESSAGE_ID = (0x83de) # type: ignore +MAXWELL_PROFILER_DEVICE = (0xb2cc) # type: ignore +NVB2CC_ALLOC_PARAMETERS_MESSAGE_ID = (0xb2cc) # type: ignore AMPERE_COMPUTE_A = 0xC6C0 # type: ignore NVC6C0_SET_OBJECT = 0x0000 # type: ignore NVC6C0_NO_OPERATION = 0x0100 # type: ignore @@ -23222,6 +23463,132 @@ NV83DE_CTRL_CMD_DEBUG_GET_MODE_MMU_GCC_DEBUG = (0x83de032b) # type: ignore NV83DE_CTRL_DEBUG_GET_MODE_MMU_GCC_DEBUG_PARAMS_MESSAGE_ID = (0x2B) # type: ignore NV83DE_CTRL_CMD_DEBUG_GET_MODE_MMU_GCC_DEBUG_ENABLED = (0x00000001) # type: ignore NV83DE_CTRL_CMD_DEBUG_GET_MODE_MMU_GCC_DEBUG_DISABLED = (0x00000002) # type: ignore +NVB0CC_CTRL_RESERVED = (0x00) # type: ignore +NVB0CC_CTRL_PROFILER = (0x01) # type: ignore +NVB0CC_CTRL_INTERNAL = (0x02) # type: ignore +NVB0CC_CTRL_CMD_NULL = (0xb0cc0000) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM = (0xb0cc0204) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_ALLOC_PMA_STREAM_FINN_PARAMS_MESSAGE_ID = (0x0) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_QUIESCE_PMA_CHANNEL = (0xb0cc0201) # type: ignore +NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS_MESSAGE_ID = (0x1) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_SRIOV_PROMOTE_PMA_STREAM = (0xb0cc0202) # type: ignore +NVB0CC_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM_PARAMS_MESSAGE_ID = (0x2) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_PERMISSIONS_INIT = (0xb0cc0203) # type: ignore +NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS_MESSAGE_ID = (0x3) # type: ignore +NVB0CC_CTRL_INTERNAL_ALLOC_PMA_STREAM_PARAMS_MESSAGE_ID = (0x4) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_FREE_PMA_STREAM = (0xb0cc0206) # type: ignore +NVB0CC_CTRL_INTERNAL_FREE_PMA_STREAM_PARAMS_MESSAGE_ID = (0x6) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_GET_MAX_PMAS = (0xb0cc0207) # type: ignore +NVB0CC_CTRL_INTERNAL_GET_MAX_PMAS_PARAMS_MESSAGE_ID = (0x7) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_BIND_PM_RESOURCES = (0xb0cc0208) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_UNBIND_PM_RESOURCES = (0xb0cc0209) # type: ignore +NVB0CC_CTRL_CMD_INTERNAL_RESERVE_HWPM_LEGACY = (0xb0cc020a) # type: ignore +NVB0CC_CTRL_INTERNAL_RESERVE_HWPM_LEGACY_PARAMS_MESSAGE_ID = (0xa) # type: ignore +NVB0CC_CTRL_CMD_POWER_REQUEST_FEATURES = (0xb0cc0301) # type: ignore +NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS_MESSAGE_ID = (0x1) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_IGNORE = (0x00000000) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_DISABLE = (0x00000001) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_ENABLE = (0x00000002) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_IGNORE = NVB0CC_CTRL_POWER_FEATURE_IGNORE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_DISABLE = NVB0CC_CTRL_POWER_FEATURE_DISABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_ENABLE = NVB0CC_CTRL_POWER_FEATURE_ENABLE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED = (0x00000000) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED = (0x00000001) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED = (0x00000002) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED = (0x00000003) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_FULFILLED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FULFILLED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_REJECTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_REJECTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_NOT_SUPPORTED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_NOT_SUPPORTED # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_REQUEST_FAILED = NVB0CC_CTRL_POWER_FEATURE_REQUEST_FAILED # type: ignore +NVB0CC_CTRL_CMD_POWER_RELEASE_FEATURES = (0xb0cc0302) # type: ignore +NVB0CC_CTRL_POWER_RELEASE_FEATURES_PARAMS_MESSAGE_ID = (0x2) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_RELEASE = (0x00000003) # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_SLCG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_RELEASE = NVB0CC_CTRL_POWER_FEATURE_RELEASE # type: ignore +NVB0CC_CTRL_CMD_RESERVE_HWPM_LEGACY = (0xb0cc0101) # type: ignore +NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS_MESSAGE_ID = (0x1) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_HWPM_LEGACY = (0xb0cc0102) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_PM_AREA_SMPC = (0xb0cc0103) # type: ignore +NVB0CC_CTRL_RESERVE_PM_AREA_SMPC_PARAMS_MESSAGE_ID = (0x3) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_PM_AREA_SMPC = (0xb0cc0104) # type: ignore +NVB0CC_CTRL_CMD_ALLOC_PMA_STREAM = (0xb0cc0105) # type: ignore +NVB0CC_PMA_BUFFER_SIZE_MAX = (0xffe00000) # type: ignore +NVB0CC_PMA_BYTES_AVAILABLE_SIZE = (0x1000) # type: ignore +NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS_MESSAGE_ID = (0x5) # type: ignore +NVB0CC_CTRL_CMD_FREE_PMA_STREAM = (0xb0cc0106) # type: ignore +NVB0CC_CTRL_FREE_PMA_STREAM_PARAMS_MESSAGE_ID = (0x6) # type: ignore +NVB0CC_CTRL_CMD_BIND_PM_RESOURCES = (0xb0cc0107) # type: ignore +NVB0CC_CTRL_CMD_UNBIND_PM_RESOURCES = (0xb0cc0108) # type: ignore +NVB0CC_CTRL_CMD_PMA_STREAM_UPDATE_GET_PUT = (0xb0cc0109) # type: ignore +NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE = 0xFFFFFFFF # type: ignore +NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS_MESSAGE_ID = (0x9) # type: ignore +NVB0CC_REGOPS_MAX_COUNT = (124) # type: ignore +NVB0CC_CTRL_CMD_EXEC_REG_OPS = (0xb0cc010a) # type: ignore +NVB0CC_CTRL_EXEC_REG_OPS_PARAMS_MESSAGE_ID = (0xA) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_PM_AREA_PC_SAMPLER = (0xb0cc010b) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_PM_AREA_PC_SAMPLER = (0xb0cc010c) # type: ignore +NVB0CC_CTRL_CMD_GET_TOTAL_HS_CREDITS = (0xb0cc010d) # type: ignore +NVB0CC_CTRL_GET_TOTAL_HS_CREDITS_PARAMS_MESSAGE_ID = (0xD) # type: ignore +NVB0CC_CTRL_CMD_SET_HS_CREDITS = (0xb0cc010e) # type: ignore +NVB0CC_MAX_CREDIT_INFO_ENTRIES = (63) # type: ignore +NVB0CC_CTRL_SET_HS_CREDITS_PARAMS_MESSAGE_ID = (0xE) # type: ignore +NVB0CC_CTRL_CMD_GET_HS_CREDITS = (0xb0cc010f) # type: ignore +NVB0CC_CTRL_GET_HS_CREDITS_PARAMS_MESSAGE_ID = (0xF) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_HES = (0xb0cc0113) # type: ignore +NVB0CC_CTRL_RESERVE_HES_PARAMS_MESSAGE_ID = (0x13) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_HES = (0xb0cc0114) # type: ignore +NVB0CC_CTRL_RELEASE_HES_PARAMS_MESSAGE_ID = (0x14) # type: ignore +NVB0CC_CREDIT_POOL_MAX_COUNT = 30 # type: ignore +NVB0CC_CTRL_CMD_GET_CHIPLET_HS_CREDIT_POOL = (0xb0cc0115) # type: ignore +NVB0CC_CTRL_GET_CHIPLET_HS_CREDIT_POOL_MESSAGE_ID = (0x15) # type: ignore +NVB0CC_CTRL_CMD_GET_HS_CREDITS_MAPPING = (0xb0cc0116) # type: ignore +NVB0CC_CTRL_GET_HS_CREDITS_POOL_MAPPING_PARAMS_MESSAGE_ID = (0x16) # type: ignore +NVB0CC_CTRL_CMD_DISABLE_DYNAMIC_MMA_BOOST = (0xb0cc0117) # type: ignore +NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS_MESSAGE_ID = (0x17) # type: ignore +NVB0CC_CTRL_CMD_GET_DYNAMIC_MMA_BOOST_STATUS = (0xb0cc0118) # type: ignore +NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS_MESSAGE_ID = (0x18) # type: ignore +NVB0CC_CTRL_CMD_RESERVE_CCU_PROF = (0xb0cc0119) # type: ignore +NVB0CC_CTRL_RESERVE_CCUPROF_PARAMS_MESSAGE_ID = (0x19) # type: ignore +NVB0CC_CTRL_CMD_RELEASE_CCU_PROF = (0xb0cc011a) # type: ignore NV_STATUS_LEVEL_OK = 0 # type: ignore NV_STATUS_LEVEL_WARN = 1 # type: ignore NV_STATUS_LEVEL_ERR = 3 # type: ignore diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index b378e14a7e..197a85963a 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -6,8 +6,9 @@ from dataclasses import dataclass from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU, hcq_filter_visible_devices, hcq_profile from tinygrad.uop.ops import sint -from tinygrad.device import BufferSpec, CompilerPair, CompilerSet +from tinygrad.device import Compiled, BufferSpec, CompilerPair, CompilerSet from tinygrad.helpers import getenv, mv_address, round_up, data64, data64_le, prod, OSX, to_mv, hi32, lo32, NV_CC, NV_PTX, NV_NAK, PROFILE +from tinygrad.helpers import ContextVar, VIZ, ProfileEvent from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.cstyle import NVRenderer from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, NVPTXCompiler, NVCompiler @@ -20,6 +21,11 @@ if getenv("IOCTL"): import extra.nv_gpu_driver.nv_ioctl # noqa: F401 # pylint: d nv_gpu = nv_570 # default to 570 +PMA = ContextVar("PMA", abs(VIZ.value)>=2) + +@dataclass(frozen=True) +class ProfilePMAEvent(ProfileEvent): device:str; kern:str; blob:bytes # noqa: E702 + def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status, 'Unknown error')}" NV_PFAULT_FAULT_TYPE = {dt:name for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_FAULT_TYPE_")} @@ -133,6 +139,7 @@ class NVComputeQueue(NVCommandQueue): qmd.set_constant_buf_addr(0, args_state.buf.va_addr) if self.active_qmd is None: + if prg.dev.pma_enabled: self.nvm(1, nv_gpu.NVC6C0_PM_TRIGGER, 0) self.nvm(1, nv_gpu.NVC6C0_SEND_PCAS_A, qmd_buf.va_addr >> 8) self.nvm(1, nv_gpu.NVC6C0_SEND_SIGNALING_PCAS2_B, 9) else: @@ -299,7 +306,11 @@ class NVProgram(HCQProgram): raise RuntimeError(f"Too many resources requested for launch, {prod(local_size)=}, {self.max_threads=}") if any(cur > mx for cur,mx in zip(global_size, [2147483647, 65535, 65535])) or any(cur > mx for cur,mx in zip(local_size, [1024, 1024, 64])): raise RuntimeError(f"Invalid global/local dims {global_size=}, {local_size=}") - return super().__call__(*bufs, global_size=global_size, local_size=local_size, vals=vals, wait=wait) + res = super().__call__(*bufs, global_size=global_size, local_size=local_size, vals=vals, wait=wait) + if self.dev.pma_enabled: + self.dev.synchronize() + if pma_blob:=self.dev._prof_readback(): Compiled.profile_events += [ProfilePMAEvent(self.dev.device, self.name, pma_blob)] + return res class NVAllocator(HCQAllocator['NVDevice']): def _alloc(self, size:int, options:BufferSpec) -> HCQBuffer: @@ -382,7 +393,8 @@ class NVKIface: def rm_control(self, obj, cmd, params=None): nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_CONTROL, made:=nv_gpu.NVOS54_PARAMETERS(hClient=self.root, hObject=obj, cmd=cmd, - paramsSize=ctypes.sizeof(params), params=ctypes.cast(ctypes.byref(params), ctypes.c_void_p) if params is not None else None)) + paramsSize=ctypes.sizeof(params) if params is not None else 0, + params=ctypes.cast(ctypes.byref(params), ctypes.c_void_p) if params is not None else None)) if made.status != 0: raise RuntimeError(f"rm_control returned {get_error_str(made.status)}") return params @@ -458,7 +470,8 @@ class NVKIface: | (nv_gpu.NVOS32_ATTR_PAGE_SIZE_HUGE if page_size > 0x1000 else 0) << 23 | ((nv_gpu.NVOS32_ATTR_LOCATION_PCI if uncached else 0) << 25) attr2 = ((nv_gpu.NVOS32_ATTR2_GPU_CACHEABLE_NO if uncached else nv_gpu.NVOS32_ATTR2_GPU_CACHEABLE_YES) << 2) \ - | ((nv_gpu.NVOS32_ATTR2_PAGE_SIZE_HUGE_2MB if page_size > 0x1000 else 0) << 20) | nv_gpu.NVOS32_ATTR2_ZBC_PREFER_NO_ZBC + | ((nv_gpu.NVOS32_ATTR2_PAGE_SIZE_HUGE_2MB if page_size > 0x1000 else 0) << 20) | nv_gpu.NVOS32_ATTR2_ZBC_PREFER_NO_ZBC \ + | ((nv_gpu.NVOS32_ATTR2_PROTECTION_USER_READ_ONLY << 22) if kwargs.get('read_only') else 0) fl = nv_gpu.NVOS32_ALLOC_FLAGS_MAP_NOT_REQUIRED | nv_gpu.NVOS32_ALLOC_FLAGS_MEMORY_HANDLE_PROVIDED | nv_gpu.NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE \ | nv_gpu.NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT | (nv_gpu.NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM if not uncached else 0) @@ -565,17 +578,17 @@ class NVDevice(HCQCompiled[HCQSignal]): self.iface.setup_vm(vaspace) channel_params = nv_gpu.NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS(engineType=nv_gpu.NV2080_ENGINE_TYPE_GRAPHICS) - channel_group = self.iface.rm_alloc(self.nvdevice, nv_gpu.KEPLER_CHANNEL_GROUP_A, channel_params) + self.channel_group = self.iface.rm_alloc(self.nvdevice, nv_gpu.KEPLER_CHANNEL_GROUP_A, channel_params) self.gpfifo_area = self.iface.alloc(0x300000, contiguous=True, cpu_access=True, force_devmem=True, map_flags=(nv_gpu.NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED<<23)) ctxshare_params = nv_gpu.NV_CTXSHARE_ALLOCATION_PARAMETERS(hVASpace=vaspace, flags=nv_gpu.NV_CTXSHARE_ALLOCATION_FLAGS_SUBCONTEXT_ASYNC) - ctxshare = self.iface.rm_alloc(channel_group, nv_gpu.FERMI_CONTEXT_SHARE_A, ctxshare_params) + ctxshare = self.iface.rm_alloc(self.channel_group, nv_gpu.FERMI_CONTEXT_SHARE_A, ctxshare_params) - self.compute_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, channel_group, offset=0, entries=0x10000, compute=True) - self.dma_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False) - self.iface.rm_control(channel_group, nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06C_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1)) + self.compute_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, self.channel_group, offset=0, entries=0x10000, compute=True) + self.dma_gpfifo = self._new_gpu_fifo(self.gpfifo_area, ctxshare, self.channel_group, offset=0x100000, entries=0x10000, compute=False) + self.iface.rm_control(self.channel_group, nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06C_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1)) self.cmdq_page:HCQBuffer = self.iface.alloc(0x200000, cpu_access=True) self.cmdq_allocator = BumpAllocator(size=self.cmdq_page.size, base=int(self.cmdq_page.va_addr), wrap=True) @@ -594,6 +607,9 @@ class NVDevice(HCQCompiled[HCQSignal]): CompilerPair(functools.partial(NAKRenderer, self.arch, self.max_warps_per_sm), None, NV_NAK)]) super().__init__(device, NVAllocator(self), compilers, functools.partial(NVProgram, self), HCQSignal, NVComputeQueue, NVCopyQueue) + self.pma_enabled = PMA.value > 0 and PROFILE >= 1 + if self.pma_enabled: self._prof_init() + self._setup_gpfifos() def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False, video=False) -> GPFifo: @@ -709,3 +725,82 @@ class NVDevice(HCQCompiled[HCQSignal]): if e.hwwGlobalEsr or e.hwwWarpEsr: report += [f"SM {i} fault: esr={e.hwwGlobalEsr} warp_esr={e.hwwWarpEsr:#x} warp_pc={e.hwwWarpEsrPc64:#x}"] raise RuntimeError("\n".join(report)) + + def _prof_init(self): + assert not self.is_nvd() and self.iface.compute_class is nv_gpu.ADA_COMPUTE_A, "not supported for PMA profiling" + + self.profiler = self.iface.rm_alloc(self.subdevice, nv_gpu.MAXWELL_PROFILER_DEVICE, + nv_gpu.NVB2CC_ALLOC_PARAMETERS(hClientTarget=self.iface.root, hContextTarget=self.channel_group)) + + power_params = nv_gpu.struct_NVB0CC_CTRL_POWER_REQUEST_FEATURES_PARAMS(controlMask=(nv_gpu.NVB0CC_CTRL_POWER_FEATURE_MASK_ELCG_DISABLE << 0) | \ + (nv_gpu.NVB0CC_CTRL_POWER_FEATURE_MASK_BLCG_DISABLE << 2) | (nv_gpu.NVB0CC_CTRL_POWER_FEATURE_MASK_ELPG_DISABLE << 6) | \ + (nv_gpu.NVB0CC_CTRL_POWER_FEATURE_MASK_IDLE_SLOWDOWN_DISABLE << 8) | (nv_gpu.NVB0CC_CTRL_POWER_FEATURE_MASK_VAT_DISABLE << 10)) + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_POWER_REQUEST_FEATURES, power_params) + + self.pma_buf = self.iface.alloc(getenv("PMA_BUFFER_SIZE", 512) << 20, uncached=True, cpu_cached=True, cpu_access=True) + self.pma_bytes = self.iface.alloc(0x1000, uncached=True, cpu_cached=True, read_only=True) + self.pma_rptr = 0 + + pma_stream = nv_gpu.struct_NVB0CC_CTRL_ALLOC_PMA_STREAM_PARAMS(hMemPmaBuffer=self.pma_buf.meta.hMemory, + pmaBufferSize=self.pma_buf.size, hMemPmaBytesAvailable=self.pma_bytes.meta.hMemory, pmaBufferVA=self.pma_buf.va_addr) + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_ALLOC_PMA_STREAM, pma_stream) + + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_RESERVE_HWPM_LEGACY, nv_gpu.struct_NVB0CC_CTRL_RESERVE_HWPM_LEGACY_PARAMS(ctxsw=0)) + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_RESERVE_PM_AREA_PC_SAMPLER) + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_BIND_PM_RESOURCES) + + self._prof_setup_pc_sampling() + + def _prof_setup_pc_sampling(self): + PMASYS_BASE, PMAGPC_BASE, GR_GPC_BASE, GPC_BASE = 0x24a000, 0x244000, 0x419000, 0x180000 + + tpc_masks = [m for i in range(self.num_gpcs) if (m:=self.iface.rm_control(self.subdevice, nv_gpu.NV2080_CTRL_CMD_GR_GET_TPC_MASK, + nv_gpu.NV2080_CTRL_GR_GET_TPC_MASK_PARAMS(gpcId=i)).tpcMask) > 0] + + # enables pma on gpc + self.reg_ops(*[(PMAGPC_BASE + gpc * 0x200, 0x100, 0x100) for gpc in range(len(tpc_masks))]) + + # sets streaming bw for each gpc + hs = nv_gpu.struct_NVB0CC_CTRL_HS_CREDITS_PARAMS(pmaChannelIdx=0, numEntries=len(tpc_masks)) + for i, mask in enumerate(tpc_masks): + hs.creditInfo[i] = nv_gpu.struct_NVB0CC_CTRL_PMA_STREAM_HS_CREDITS_INFO( + chipletType=nv_gpu.NVB0CC_CHIPLET_TYPE_GPC, chipletIndex=i, numCredits=bin(mask).count('1')) + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_SET_HS_CREDITS, hs) + + self.reg_ops(*[(PMASYS_BASE + 0x65c + off * 4, 0xffffffff) for off in range(self.num_gpcs * 2)]) + self.reg_ops((PMASYS_BASE + 0x620, 0x2000007)) + + # tpc addressing is right aligned + tpc_cnt = [bin(mask).count('1') for mask in tpc_masks] + def SM_REG(gpc, tpc, sm, reg): return GPC_BASE + gpc * 0x4000 + (self.num_tpc_per_gpc - tpc_cnt[gpc] + tpc) * 0x200 + [0x400, 0x1000][sm] + reg + + self.reg_ops(*[op for gpc in range(len(tpc_masks)) for tpc in range(tpc_cnt[gpc]) for sm in range(2) for op in [ + (SM_REG(gpc, tpc, sm, 0x128), (gpc << 5) | (tpc << 1) | sm), # enumeration. NOTE: different from cuda + (SM_REG(gpc, tpc, sm, 0x40), 0x19181716), (SM_REG(gpc, tpc, sm, 0x48), 0x1d1c1b1a), (SM_REG(gpc, tpc, sm, 0x50), 0x1e201f), # unk, counters? + (SM_REG(gpc, tpc, sm, 0xec), 0x1), (SM_REG(gpc, tpc, sm, 0x6c), 0x2), (SM_REG(gpc, tpc, sm, 0x9c), 0x5), (SM_REG(gpc, tpc, sm, 0x108), 0x20)]]) + + # enable pc sampling for the context + self.reg_ops((GR_GPC_BASE + 0xbdc, 0x1), reg_type=1) + + def reg_ops(self, *ops, reg_type=0, op=nv_gpu.NV2080_CTRL_GPU_REG_OP_WRITE_32): + for i in range(0, len(ops), 124): + params = nv_gpu.struct_NVB0CC_CTRL_EXEC_REG_OPS_PARAMS(regOpCount=len(chunk:=ops[i:i+124])) + for j, (off, val, *rest) in enumerate(chunk): + params.regOps[j] = nv_gpu.struct_NV2080_CTRL_GPU_REG_OP(regOp=op, regType=reg_type, + regOffset=off, regValueLo=val, regAndNMaskLo=rest[0] if rest else 0xffffffff) + with contextlib.suppress(RuntimeError): self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_EXEC_REG_OPS, params) + + def _prof_readback(self) -> bytes|None: + params = self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_PMA_STREAM_UPDATE_GET_PUT, + nv_gpu.struct_NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS(bUpdateAvailableBytes=1, bWait=1)) + + if params.bOverflowStatus: raise RuntimeError("PMA profiler: buffer overflow detected") + if params.bytesAvailable == 0: return None + + start, end = self.pma_rptr, self.pma_rptr + params.bytesAvailable + pma_data = self.pma_buf.cpu_view()[start:min(end, self.pma_buf.size)] + self.pma_buf.cpu_view()[:max(0, end - self.pma_buf.size)] + self.pma_rptr = end % self.pma_buf.size + + self.iface.rm_control(self.profiler, nv_gpu.NVB0CC_CTRL_CMD_PMA_STREAM_UPDATE_GET_PUT, + nv_gpu.struct_NVB0CC_CTRL_PMA_STREAM_UPDATE_GET_PUT_PARAMS(bytesConsumed=params.bytesAvailable)) + return pma_data