diff --git a/autogen_stubs.sh b/autogen_stubs.sh index 2f7709fd84..38a2a9993e 100755 --- a/autogen_stubs.sh +++ b/autogen_stubs.sh @@ -119,6 +119,12 @@ generate_nv() { sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x) sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '', ')'] -> UVM_name = + + # Parse status codes + sed -n '1i\ +nv_status_codes = {} +/^NV_STATUS_CODE/ { s/^NV_STATUS_CODE(\([^,]*\), *\([^,]*\), *"\([^"]*\)") *.*$/\1 = \2\nnv_status_codes[\1] = "\3"/; p }' $NVKERN_SRC/src/common/sdk/nvidia/inc/nvstatuscodes.h >> $BASE/nv_gpu.py + python3 -c "import tinygrad.runtime.autogen.nv_gpu" } diff --git a/tinygrad/runtime/autogen/nv_gpu.py b/tinygrad/runtime/autogen/nv_gpu.py index ee66193b93..4c60ecf394 100644 --- a/tinygrad/runtime/autogen/nv_gpu.py +++ b/tinygrad/runtime/autogen/nv_gpu.py @@ -33326,3 +33326,272 @@ __all__ = \ 'union_NV2080_CTRL_NVLINK_CALLBACK_TYPE_callbackParams', 'union_NV2080_CTRL_NVLINK_INJECT_TLC_ERROR_TYPE', 'union_RM_GSP_SPDM_CMD', 'union_c__SA_NVOS32_PARAMETERS_data'] +nv_status_codes = {} +NV_OK = 0x00000000 +nv_status_codes[NV_OK] = "Success" +NV_ERR_GENERIC = 0x0000FFFF +nv_status_codes[NV_ERR_GENERIC] = "Failure: Generic Error" +NV_ERR_BROKEN_FB = 0x00000001 +nv_status_codes[NV_ERR_BROKEN_FB] = "Frame-Buffer broken" +NV_ERR_BUFFER_TOO_SMALL = 0x00000002 +nv_status_codes[NV_ERR_BUFFER_TOO_SMALL] = "Buffer passed in is too small" +NV_ERR_BUSY_RETRY = 0x00000003 +nv_status_codes[NV_ERR_BUSY_RETRY] = "System is busy, retry later" +NV_ERR_CALLBACK_NOT_SCHEDULED = 0x00000004 +nv_status_codes[NV_ERR_CALLBACK_NOT_SCHEDULED] = "The requested callback API not scheduled" +NV_ERR_CARD_NOT_PRESENT = 0x00000005 +nv_status_codes[NV_ERR_CARD_NOT_PRESENT] = "Card not detected" +NV_ERR_CYCLE_DETECTED = 0x00000006 +nv_status_codes[NV_ERR_CYCLE_DETECTED] = "Call cycle detected" +NV_ERR_DMA_IN_USE = 0x00000007 +nv_status_codes[NV_ERR_DMA_IN_USE] = "Requested DMA is in use" +NV_ERR_DMA_MEM_NOT_LOCKED = 0x00000008 +nv_status_codes[NV_ERR_DMA_MEM_NOT_LOCKED] = "Requested DMA memory is not locked" +NV_ERR_DMA_MEM_NOT_UNLOCKED = 0x00000009 +nv_status_codes[NV_ERR_DMA_MEM_NOT_UNLOCKED] = "Requested DMA memory is not unlocked" +NV_ERR_DUAL_LINK_INUSE = 0x0000000A +nv_status_codes[NV_ERR_DUAL_LINK_INUSE] = "Dual-Link is in use" +NV_ERR_ECC_ERROR = 0x0000000B +nv_status_codes[NV_ERR_ECC_ERROR] = "Generic ECC error" +NV_ERR_FIFO_BAD_ACCESS = 0x0000000C +nv_status_codes[NV_ERR_FIFO_BAD_ACCESS] = "FIFO: Invalid access" +NV_ERR_FREQ_NOT_SUPPORTED = 0x0000000D +nv_status_codes[NV_ERR_FREQ_NOT_SUPPORTED] = "Requested frequency is not supported" +NV_ERR_GPU_DMA_NOT_INITIALIZED = 0x0000000E +nv_status_codes[NV_ERR_GPU_DMA_NOT_INITIALIZED] = "Requested DMA not initialized" +NV_ERR_GPU_IS_LOST = 0x0000000F +nv_status_codes[NV_ERR_GPU_IS_LOST] = "GPU lost from the bus" +NV_ERR_GPU_IN_FULLCHIP_RESET = 0x00000010 +nv_status_codes[NV_ERR_GPU_IN_FULLCHIP_RESET] = "GPU currently in full-chip reset" +NV_ERR_GPU_NOT_FULL_POWER = 0x00000011 +nv_status_codes[NV_ERR_GPU_NOT_FULL_POWER] = "GPU not in full power" +NV_ERR_GPU_UUID_NOT_FOUND = 0x00000012 +nv_status_codes[NV_ERR_GPU_UUID_NOT_FOUND] = "GPU UUID not found" +NV_ERR_HOT_SWITCH = 0x00000013 +nv_status_codes[NV_ERR_HOT_SWITCH] = "System in hot switch" +NV_ERR_I2C_ERROR = 0x00000014 +nv_status_codes[NV_ERR_I2C_ERROR] = "I2C Error" +NV_ERR_I2C_SPEED_TOO_HIGH = 0x00000015 +nv_status_codes[NV_ERR_I2C_SPEED_TOO_HIGH] = "I2C Error: Speed too high" +NV_ERR_ILLEGAL_ACTION = 0x00000016 +nv_status_codes[NV_ERR_ILLEGAL_ACTION] = "Current action is not allowed" +NV_ERR_IN_USE = 0x00000017 +nv_status_codes[NV_ERR_IN_USE] = "Generic busy error" +NV_ERR_INFLATE_COMPRESSED_DATA_FAILED = 0x00000018 +nv_status_codes[NV_ERR_INFLATE_COMPRESSED_DATA_FAILED] = "Failed to inflate compressed data" +NV_ERR_INSERT_DUPLICATE_NAME = 0x00000019 +nv_status_codes[NV_ERR_INSERT_DUPLICATE_NAME] = "Found a duplicate entry in the requested btree" +NV_ERR_INSUFFICIENT_RESOURCES = 0x0000001A +nv_status_codes[NV_ERR_INSUFFICIENT_RESOURCES] = "Ran out of a critical resource, other than memory" +NV_ERR_INSUFFICIENT_PERMISSIONS = 0x0000001B +nv_status_codes[NV_ERR_INSUFFICIENT_PERMISSIONS] = "The requester does not have sufficient permissions" +NV_ERR_INSUFFICIENT_POWER = 0x0000001C +nv_status_codes[NV_ERR_INSUFFICIENT_POWER] = "Generic Error: Low power" +NV_ERR_INVALID_ACCESS_TYPE = 0x0000001D +nv_status_codes[NV_ERR_INVALID_ACCESS_TYPE] = "This type of access is not allowed" +NV_ERR_INVALID_ADDRESS = 0x0000001E +nv_status_codes[NV_ERR_INVALID_ADDRESS] = "Address not valid" +NV_ERR_INVALID_ARGUMENT = 0x0000001F +nv_status_codes[NV_ERR_INVALID_ARGUMENT] = "Invalid argument to call" +NV_ERR_INVALID_BASE = 0x00000020 +nv_status_codes[NV_ERR_INVALID_BASE] = "Invalid base" +NV_ERR_INVALID_CHANNEL = 0x00000021 +nv_status_codes[NV_ERR_INVALID_CHANNEL] = "Given channel-id not valid" +NV_ERR_INVALID_CLASS = 0x00000022 +nv_status_codes[NV_ERR_INVALID_CLASS] = "Given class-id not valid" +NV_ERR_INVALID_CLIENT = 0x00000023 +nv_status_codes[NV_ERR_INVALID_CLIENT] = "Given client not valid" +NV_ERR_INVALID_COMMAND = 0x00000024 +nv_status_codes[NV_ERR_INVALID_COMMAND] = "Command passed is not valid" +NV_ERR_INVALID_DATA = 0x00000025 +nv_status_codes[NV_ERR_INVALID_DATA] = "Invalid data passed" +NV_ERR_INVALID_DEVICE = 0x00000026 +nv_status_codes[NV_ERR_INVALID_DEVICE] = "Current device is not valid" +NV_ERR_INVALID_DMA_SPECIFIER = 0x00000027 +nv_status_codes[NV_ERR_INVALID_DMA_SPECIFIER] = "The requested DMA specifier is not valid" +NV_ERR_INVALID_EVENT = 0x00000028 +nv_status_codes[NV_ERR_INVALID_EVENT] = "Invalid event occurred" +NV_ERR_INVALID_FLAGS = 0x00000029 +nv_status_codes[NV_ERR_INVALID_FLAGS] = "Invalid flags passed" +NV_ERR_INVALID_FUNCTION = 0x0000002A +nv_status_codes[NV_ERR_INVALID_FUNCTION] = "Called function is not valid" +NV_ERR_INVALID_HEAP = 0x0000002B +nv_status_codes[NV_ERR_INVALID_HEAP] = "Heap corrupted" +NV_ERR_INVALID_INDEX = 0x0000002C +nv_status_codes[NV_ERR_INVALID_INDEX] = "Index invalid" +NV_ERR_INVALID_IRQ_LEVEL = 0x0000002D +nv_status_codes[NV_ERR_INVALID_IRQ_LEVEL] = "Requested IRQ level is not valid" +NV_ERR_INVALID_LIMIT = 0x0000002E +nv_status_codes[NV_ERR_INVALID_LIMIT] = "Generic Error: Invalid limit" +NV_ERR_INVALID_LOCK_STATE = 0x0000002F +nv_status_codes[NV_ERR_INVALID_LOCK_STATE] = "Requested lock state not valid" +NV_ERR_INVALID_METHOD = 0x00000030 +nv_status_codes[NV_ERR_INVALID_METHOD] = "Requested method not valid" +NV_ERR_INVALID_OBJECT = 0x00000031 +nv_status_codes[NV_ERR_INVALID_OBJECT] = "Object not valid" +NV_ERR_INVALID_OBJECT_BUFFER = 0x00000032 +nv_status_codes[NV_ERR_INVALID_OBJECT_BUFFER] = "Object buffer passed is not valid" +NV_ERR_INVALID_OBJECT_HANDLE = 0x00000033 +nv_status_codes[NV_ERR_INVALID_OBJECT_HANDLE] = "Object handle is not valid" +NV_ERR_INVALID_OBJECT_NEW = 0x00000034 +nv_status_codes[NV_ERR_INVALID_OBJECT_NEW] = "New object is not valid" +NV_ERR_INVALID_OBJECT_OLD = 0x00000035 +nv_status_codes[NV_ERR_INVALID_OBJECT_OLD] = "Old object is not valid" +NV_ERR_INVALID_OBJECT_PARENT = 0x00000036 +nv_status_codes[NV_ERR_INVALID_OBJECT_PARENT] = "Object parent is not valid" +NV_ERR_INVALID_OFFSET = 0x00000037 +nv_status_codes[NV_ERR_INVALID_OFFSET] = "The offset passed is not valid" +NV_ERR_INVALID_OPERATION = 0x00000038 +nv_status_codes[NV_ERR_INVALID_OPERATION] = "Requested operation is not valid" +NV_ERR_INVALID_OWNER = 0x00000039 +nv_status_codes[NV_ERR_INVALID_OWNER] = "Owner not valid" +NV_ERR_INVALID_PARAM_STRUCT = 0x0000003A +nv_status_codes[NV_ERR_INVALID_PARAM_STRUCT] = "Invalid structure parameter" +NV_ERR_INVALID_PARAMETER = 0x0000003B +nv_status_codes[NV_ERR_INVALID_PARAMETER] = "At least one of the parameters passed is not valid" +NV_ERR_INVALID_PATH = 0x0000003C +nv_status_codes[NV_ERR_INVALID_PATH] = "The requested path is not valid" +NV_ERR_INVALID_POINTER = 0x0000003D +nv_status_codes[NV_ERR_INVALID_POINTER] = "Pointer not valid" +NV_ERR_INVALID_REGISTRY_KEY = 0x0000003E +nv_status_codes[NV_ERR_INVALID_REGISTRY_KEY] = "Found an invalid registry key" +NV_ERR_INVALID_REQUEST = 0x0000003F +nv_status_codes[NV_ERR_INVALID_REQUEST] = "Generic Error: Invalid request" +NV_ERR_INVALID_STATE = 0x00000040 +nv_status_codes[NV_ERR_INVALID_STATE] = "Generic Error: Invalid state" +NV_ERR_INVALID_STRING_LENGTH = 0x00000041 +nv_status_codes[NV_ERR_INVALID_STRING_LENGTH] = "The string length is not valid" +NV_ERR_INVALID_READ = 0x00000042 +nv_status_codes[NV_ERR_INVALID_READ] = "The requested read operation is not valid" +NV_ERR_INVALID_WRITE = 0x00000043 +nv_status_codes[NV_ERR_INVALID_WRITE] = "The requested write operation is not valid" +NV_ERR_INVALID_XLATE = 0x00000044 +nv_status_codes[NV_ERR_INVALID_XLATE] = "The requested translate operation is not valid" +NV_ERR_IRQ_NOT_FIRING = 0x00000045 +nv_status_codes[NV_ERR_IRQ_NOT_FIRING] = "Requested IRQ is not firing" +NV_ERR_IRQ_EDGE_TRIGGERED = 0x00000046 +nv_status_codes[NV_ERR_IRQ_EDGE_TRIGGERED] = "IRQ is edge triggered" +NV_ERR_MEMORY_TRAINING_FAILED = 0x00000047 +nv_status_codes[NV_ERR_MEMORY_TRAINING_FAILED] = "Failed memory training sequence" +NV_ERR_MISMATCHED_SLAVE = 0x00000048 +nv_status_codes[NV_ERR_MISMATCHED_SLAVE] = "Slave mismatch" +NV_ERR_MISMATCHED_TARGET = 0x00000049 +nv_status_codes[NV_ERR_MISMATCHED_TARGET] = "Target mismatch" +NV_ERR_MISSING_TABLE_ENTRY = 0x0000004A +nv_status_codes[NV_ERR_MISSING_TABLE_ENTRY] = "Requested entry missing not found in the table" +NV_ERR_MODULE_LOAD_FAILED = 0x0000004B +nv_status_codes[NV_ERR_MODULE_LOAD_FAILED] = "Failed to load the requested module" +NV_ERR_MORE_DATA_AVAILABLE = 0x0000004C +nv_status_codes[NV_ERR_MORE_DATA_AVAILABLE] = "There is more data available" +NV_ERR_MORE_PROCESSING_REQUIRED = 0x0000004D +nv_status_codes[NV_ERR_MORE_PROCESSING_REQUIRED] = "More processing required for the given call" +NV_ERR_MULTIPLE_MEMORY_TYPES = 0x0000004E +nv_status_codes[NV_ERR_MULTIPLE_MEMORY_TYPES] = "Multiple memory types found" +NV_ERR_NO_FREE_FIFOS = 0x0000004F +nv_status_codes[NV_ERR_NO_FREE_FIFOS] = "No more free FIFOs found" +NV_ERR_NO_INTR_PENDING = 0x00000050 +nv_status_codes[NV_ERR_NO_INTR_PENDING] = "No interrupt pending" +NV_ERR_NO_MEMORY = 0x00000051 +nv_status_codes[NV_ERR_NO_MEMORY] = "Out of memory" +NV_ERR_NO_SUCH_DOMAIN = 0x00000052 +nv_status_codes[NV_ERR_NO_SUCH_DOMAIN] = "Requested domain does not exist" +NV_ERR_NO_VALID_PATH = 0x00000053 +nv_status_codes[NV_ERR_NO_VALID_PATH] = "Caller did not specify a valid path" +NV_ERR_NOT_COMPATIBLE = 0x00000054 +nv_status_codes[NV_ERR_NOT_COMPATIBLE] = "Generic Error: Incompatible types" +NV_ERR_NOT_READY = 0x00000055 +nv_status_codes[NV_ERR_NOT_READY] = "Generic Error: Not ready" +NV_ERR_NOT_SUPPORTED = 0x00000056 +nv_status_codes[NV_ERR_NOT_SUPPORTED] = "Call not supported" +NV_ERR_OBJECT_NOT_FOUND = 0x00000057 +nv_status_codes[NV_ERR_OBJECT_NOT_FOUND] = "Requested object not found" +NV_ERR_OBJECT_TYPE_MISMATCH = 0x00000058 +nv_status_codes[NV_ERR_OBJECT_TYPE_MISMATCH] = "Specified objects do not match" +NV_ERR_OPERATING_SYSTEM = 0x00000059 +nv_status_codes[NV_ERR_OPERATING_SYSTEM] = "Generic operating system error" +NV_ERR_OTHER_DEVICE_FOUND = 0x0000005A +nv_status_codes[NV_ERR_OTHER_DEVICE_FOUND] = "Found other device instead of the requested one" +NV_ERR_OUT_OF_RANGE = 0x0000005B +nv_status_codes[NV_ERR_OUT_OF_RANGE] = "The specified value is out of bounds" +NV_ERR_OVERLAPPING_UVM_COMMIT = 0x0000005C +nv_status_codes[NV_ERR_OVERLAPPING_UVM_COMMIT] = "Overlapping unified virtual memory commit" +NV_ERR_PAGE_TABLE_NOT_AVAIL = 0x0000005D +nv_status_codes[NV_ERR_PAGE_TABLE_NOT_AVAIL] = "Requested page table not available" +NV_ERR_PID_NOT_FOUND = 0x0000005E +nv_status_codes[NV_ERR_PID_NOT_FOUND] = "Process-Id not found" +NV_ERR_PROTECTION_FAULT = 0x0000005F +nv_status_codes[NV_ERR_PROTECTION_FAULT] = "Protection fault" +NV_ERR_RC_ERROR = 0x00000060 +nv_status_codes[NV_ERR_RC_ERROR] = "Generic RC error" +NV_ERR_REJECTED_VBIOS = 0x00000061 +nv_status_codes[NV_ERR_REJECTED_VBIOS] = "Given Video BIOS rejected/invalid" +NV_ERR_RESET_REQUIRED = 0x00000062 +nv_status_codes[NV_ERR_RESET_REQUIRED] = "Reset required" +NV_ERR_STATE_IN_USE = 0x00000063 +nv_status_codes[NV_ERR_STATE_IN_USE] = "State in use" +NV_ERR_SIGNAL_PENDING = 0x00000064 +nv_status_codes[NV_ERR_SIGNAL_PENDING] = "Signal pending" +NV_ERR_TIMEOUT = 0x00000065 +nv_status_codes[NV_ERR_TIMEOUT] = "Call timed out" +NV_ERR_TIMEOUT_RETRY = 0x00000066 +nv_status_codes[NV_ERR_TIMEOUT_RETRY] = "Call timed out, please retry later" +NV_ERR_TOO_MANY_PRIMARIES = 0x00000067 +nv_status_codes[NV_ERR_TOO_MANY_PRIMARIES] = "Too many primaries" +NV_ERR_UVM_ADDRESS_IN_USE = 0x00000068 +nv_status_codes[NV_ERR_UVM_ADDRESS_IN_USE] = "Unified virtual memory requested address already in use" +NV_ERR_MAX_SESSION_LIMIT_REACHED = 0x00000069 +nv_status_codes[NV_ERR_MAX_SESSION_LIMIT_REACHED] = "Maximum number of sessions reached" +NV_ERR_LIB_RM_VERSION_MISMATCH = 0x0000006A +nv_status_codes[NV_ERR_LIB_RM_VERSION_MISMATCH] = "Library version doesn't match driver version" +NV_ERR_PRIV_SEC_VIOLATION = 0x0000006B +nv_status_codes[NV_ERR_PRIV_SEC_VIOLATION] = "Priv security violation" +NV_ERR_GPU_IN_DEBUG_MODE = 0x0000006C +nv_status_codes[NV_ERR_GPU_IN_DEBUG_MODE] = "GPU currently in debug mode" +NV_ERR_FEATURE_NOT_ENABLED = 0x0000006D +nv_status_codes[NV_ERR_FEATURE_NOT_ENABLED] = "Requested Feature functionality is not enabled" +NV_ERR_RESOURCE_LOST = 0x0000006E +nv_status_codes[NV_ERR_RESOURCE_LOST] = "Requested resource has been destroyed" +NV_ERR_PMU_NOT_READY = 0x0000006F +nv_status_codes[NV_ERR_PMU_NOT_READY] = "PMU is not ready or has not yet been initialized" +NV_ERR_FLCN_ERROR = 0x00000070 +nv_status_codes[NV_ERR_FLCN_ERROR] = "Generic falcon assert or halt" +NV_ERR_FATAL_ERROR = 0x00000071 +nv_status_codes[NV_ERR_FATAL_ERROR] = "Fatal/unrecoverable error" +NV_ERR_MEMORY_ERROR = 0x00000072 +nv_status_codes[NV_ERR_MEMORY_ERROR] = "Generic memory error" +NV_ERR_INVALID_LICENSE = 0x00000073 +nv_status_codes[NV_ERR_INVALID_LICENSE] = "License provided is rejected or invalid" +NV_ERR_NVLINK_INIT_ERROR = 0x00000074 +nv_status_codes[NV_ERR_NVLINK_INIT_ERROR] = "Nvlink Init Error" +NV_ERR_NVLINK_MINION_ERROR = 0x00000075 +nv_status_codes[NV_ERR_NVLINK_MINION_ERROR] = "Nvlink Minion Error" +NV_ERR_NVLINK_CLOCK_ERROR = 0x00000076 +nv_status_codes[NV_ERR_NVLINK_CLOCK_ERROR] = "Nvlink Clock Error" +NV_ERR_NVLINK_TRAINING_ERROR = 0x00000077 +nv_status_codes[NV_ERR_NVLINK_TRAINING_ERROR] = "Nvlink Training Error" +NV_ERR_NVLINK_CONFIGURATION_ERROR = 0x00000078 +nv_status_codes[NV_ERR_NVLINK_CONFIGURATION_ERROR] = "Nvlink Configuration Error" +NV_ERR_RISCV_ERROR = 0x00000079 +nv_status_codes[NV_ERR_RISCV_ERROR] = "Generic RISC-V assert or halt" +NV_ERR_FABRIC_MANAGER_NOT_PRESENT = 0x0000007A +nv_status_codes[NV_ERR_FABRIC_MANAGER_NOT_PRESENT] = "Fabric Manager is not loaded" +NV_ERR_ALREADY_SIGNALLED = 0x0000007B +nv_status_codes[NV_ERR_ALREADY_SIGNALLED] = "Semaphore Surface value already >= requested wait value" +NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE = 0x0000007C +nv_status_codes[NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE] = "PMU RPC error due to no queue slot available for this event" +NV_WARN_HOT_SWITCH = 0x00010001 +nv_status_codes[NV_WARN_HOT_SWITCH] = "WARNING Hot switch" +NV_WARN_INCORRECT_PERFMON_DATA = 0x00010002 +nv_status_codes[NV_WARN_INCORRECT_PERFMON_DATA] = "WARNING Incorrect performance monitor data" +NV_WARN_MISMATCHED_SLAVE = 0x00010003 +nv_status_codes[NV_WARN_MISMATCHED_SLAVE] = "WARNING Slave mismatch" +NV_WARN_MISMATCHED_TARGET = 0x00010004 +nv_status_codes[NV_WARN_MISMATCHED_TARGET] = "WARNING Target mismatch" +NV_WARN_MORE_PROCESSING_REQUIRED = 0x00010005 +nv_status_codes[NV_WARN_MORE_PROCESSING_REQUIRED] = "WARNING More processing required for the call" +NV_WARN_NOTHING_TO_DO = 0x00010006 +nv_status_codes[NV_WARN_NOTHING_TO_DO] = "WARNING Nothing to do" +NV_WARN_NULL_OBJECT = 0x00010007 +nv_status_codes[NV_WARN_NULL_OBJECT] = "WARNING NULL object found" +NV_WARN_OUT_OF_RANGE = 0x00010008 +nv_status_codes[NV_WARN_OUT_OF_RANGE] = "WARNING value out of range" diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index 85b5909274..7d976844df 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -28,20 +28,20 @@ def rm_alloc(fd, clss, root, parant, params): made = nv_gpu.NVOS21_PARAMETERS(hRoot=root, hObjectParent=parant, hClass=clss, pAllocParms=ctypes.cast(ctypes.byref(params), ctypes.POINTER(None)) if params is not None else None) # type: ignore nv_iowr(fd, nv_gpu.NV_ESC_RM_ALLOC, made) - if made.status != 0: raise RuntimeError(f"rm_alloc returned {made.status}") + if made.status != 0: raise RuntimeError(f"rm_alloc returned {made.status}: {nv_gpu.nv_status_codes.get(made.status, 'Unknown error')}") return made def rm_control(fd, cmd, client, obj, params): made = nv_gpu.NVOS54_PARAMETERS(hClient=client, hObject=obj, cmd=cmd, paramsSize=ctypes.sizeof(params), params=ctypes.cast(ctypes.byref(params), ctypes.POINTER(None)) if params is not None else None) # type: ignore nv_iowr(fd, nv_gpu.NV_ESC_RM_CONTROL, made) - if made.status != 0: raise RuntimeError(f"rm_control returned {made.status}") + if made.status != 0: raise RuntimeError(f"rm_control returned {made.status}: {nv_gpu.nv_status_codes.get(made.status, 'Unknown error')}") return made def uvm_ioctl(cmd, sttyp, fd, **kwargs): ret = fcntl.ioctl(fd, cmd, made:=sttyp(**kwargs)) - if ret != 0: raise RuntimeError(f"uvm_ioctl returned {ret}") - if made.rmStatus != 0: raise RuntimeError(f"uvm_ioctl struct returned {made.rmStatus}") + if ret != 0: raise RuntimeError(f"ioctl(uvm) returned {ret}") + if made.rmStatus != 0: raise RuntimeError(f"uvm_ioctl returned {made.rmStatus}: {nv_gpu.nv_status_codes.get(made.rmStatus, 'Unknown error')}") return made def make_uvm_type():