From 105fbd792566e0c1702aea303ac8e1695038bfbb Mon Sep 17 00:00:00 2001 From: George Hotz Date: Wed, 1 May 2024 11:17:01 -0700 Subject: [PATCH] add 3080 support to NV --- tinygrad/runtime/ops_nv.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index 1a8fdbd057..366a014946 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -389,9 +389,12 @@ class NVDevice(Compiled): fd_uvm_2 = os.open("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC) NVDevice.root = rm_alloc(self.fd_ctl, nv_gpu.NV01_ROOT_CLIENT, 0, 0, None).hObjectNew uvm.initialize(self.fd_uvm) - uvm.mm_initialize(fd_uvm_2, uvmFd=self.fd_uvm) + try: + uvm.mm_initialize(fd_uvm_2, uvmFd=self.fd_uvm) + except RuntimeError: + pass # this error is okay, CUDA hits it too - NVDevice.gpus_info = (nv_gpu.nv_ioctl_card_info_t*16)() + NVDevice.gpus_info = (nv_gpu.nv_ioctl_card_info_t*64)() nv_iowr(NVDevice.fd_ctl, nv_gpu.NV_ESC_CARD_INFO, NVDevice.gpus_info) # TODO: Get classes from NV0080_CTRL_CMD_GPU_GET_CLASSLIST_V2 @@ -401,6 +404,8 @@ class NVDevice(Compiled): assert NVDevice.gpus_info[self.device_id].valid gpu_info = nv_gpu.NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS(gpuId=NVDevice.gpus_info[self.device_id].gpu_id) rm_control(self.fd_ctl, nv_gpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2, self.root, self.root, gpu_info) + device_id = NVDevice.gpus_info[self.device_id].pci_info.device_id + self.compute_type = nv_gpu.AMPERE_COMPUTE_B if device_id in [0x2204, 0x2206] else nv_gpu.ADA_COMPUTE_A device_params = nv_gpu.NV0080_ALLOC_PARAMETERS(deviceId=gpu_info.deviceInstance, hClientShare=self.root, vaMode=nv_gpu.NV_DEVICE_ALLOCATION_VAMODE_MULTIPLE_VASPACES) @@ -506,7 +511,7 @@ class NVDevice(Compiled): gpFifoOffset=gpfifo.base+offset, gpFifoEntries=entries, hContextShare=ctxshare, hUserdMemory=(ctypes.c_uint32*8)(gpfifo.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset)) gpfifo = rm_alloc(self.fd_ctl, nv_gpu.AMPERE_CHANNEL_GPFIFO_A, self.root, channel_group, params).hObjectNew - rm_alloc(self.fd_ctl, nv_gpu.ADA_COMPUTE_A, self.root, gpfifo, None) + rm_alloc(self.fd_ctl, self.compute_type, self.root, gpfifo, None) rm_alloc(self.fd_ctl, nv_gpu.AMPERE_DMA_COPY_B, self.root, gpfifo, None) ws_token_params = nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS(workSubmitToken=-1) @@ -529,7 +534,7 @@ class NVDevice(Compiled): self.shared_mem_window, self.local_mem_window = 0xfe000000, 0xff000000 queue = HWComputeQueue() - queue.q += [nvmethod(1, nv_gpu.NVC6C0_SET_OBJECT, 1), nv_gpu.ADA_COMPUTE_A] + queue.q += [nvmethod(1, nv_gpu.NVC6C0_SET_OBJECT, 1), self.compute_type] queue.q += [nvmethod(1, nv_gpu.NVC6C0_SET_SHADER_LOCAL_MEMORY_A, 2), *nvdata64(self.shader_local_mem)] queue.q += [nvmethod(1, nv_gpu.NVC6C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A, 3), *nvdata64(bytes_per_tpc), 0x40] queue.q += [nvmethod(1, nv_gpu.NVC6C0_SET_SHADER_LOCAL_MEMORY_WINDOW_A, 2), *nvdata64(self.local_mem_window)]