diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index 6147f34271..8bda3fd7d4 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -478,9 +478,7 @@ class PCIIface(PCIIfaceBase): def setup_gpfifo_vm(self, gpfifo): pass def rm_alloc(self, parent, clss, params=None, root=None) -> int: return self.dev_impl.gsp.rpc_rm_alloc(parent, clss, params, self.root) - def rm_control(self, obj, cmd, params=None): - res = self.dev_impl.gsp.rpc_rm_control(obj, cmd, params, self.root) - return type(params).from_buffer_copy(res) if params is not None else None + def rm_control(self, obj, cmd, params=None): return self.dev_impl.gsp.rpc_rm_control(obj, cmd, params, self.root) def device_fini(self): self.dev_impl.fini() diff --git a/tinygrad/runtime/support/nv/ip.py b/tinygrad/runtime/support/nv/ip.py index b53d5a18c1..8cdbb90670 100644 --- a/tinygrad/runtime/support/nv/ip.py +++ b/tinygrad/runtime/support/nv/ip.py @@ -374,10 +374,17 @@ class NV_GSP(NV_IP): hVASpace=vaspace, userdOffset=(ctypes.c_uint64*8)(0x20 * 8), userdMem=userd, internalFlags=0x1a, flags=0x200320) ch_gpfifo = self.rpc_rm_alloc(hParent=dev, hClass=nv_gpu.AMPERE_CHANNEL_GPFIFO_A, params=gg_params) - self.grctx_bufs = {0: GRBufDesc(0x237000, p=1, v=1), 1: GRBufDesc(0x6000, p=1, v=1, lc=1), 2: GRBufDesc(0x6000, p=1, v=1), - 3: GRBufDesc(0x3000, p=0, v=1), 4: GRBufDesc(0x20000, p=0, v=1), 5: GRBufDesc(0x2600000, p=0, v=1), 6: GRBufDesc(0x80000, p=0, v=1), - 9: GRBufDesc(0x10000, p=1, v=1), 10: GRBufDesc(0x80000, p=1, v=0), 11: GRBufDesc(0x80000, p=1, v=1)} + gr_ctx_bufs_info = self.rpc_rm_control(hObject=subdev, cmd=nv_gpu.NV2080_CTRL_CMD_INTERNAL_STATIC_KGR_GET_CONTEXT_BUFFERS_INFO, + params=nv_gpu.NV2080_CTRL_INTERNAL_STATIC_KGR_GET_CONTEXT_BUFFERS_INFO_PARAMS()).engineContextBuffersInfo[0] + def _ctx_info(idx, add=0, align=None): return round_up(gr_ctx_bufs_info.engine[idx].size + add, align or gr_ctx_bufs_info.engine[idx].alignment) + # Setup graphics context + gr_size = _ctx_info(nv_gpu.NV0080_CTRL_FIFO_GET_ENGINE_CONTEXT_PROPERTIES_ENGINE_ID_GRAPHICS, add=0x40000) + patch_size = _ctx_info(nv_gpu.NV0080_CTRL_FIFO_GET_ENGINE_CONTEXT_PROPERTIES_ENGINE_ID_GRAPHICS_PATCH) + cfgs_sizes = {x: _ctx_info(x + 14, align=(2 << 20) if x == 5 else None) for x in range(3, 11)} # indices 3–10 are mapped to 17–24 + self.grctx_bufs = {0: GRBufDesc(gr_size, p=1, v=1), 1: GRBufDesc(patch_size, p=1, v=1, lc=1), 2: GRBufDesc(patch_size, p=1, v=1), + **{x: GRBufDesc(cfgs_sizes[x], p=0, v=1) for x in range(3, 7)}, 9: GRBufDesc(cfgs_sizes[9], p=1, v=1), + 10: GRBufDesc(cfgs_sizes[10], p=1, v=0), 11: GRBufDesc(cfgs_sizes[10], p=1, v=1)} # NOTE: 11 reuses cfgs_sizes[10] self.promote_ctx(self.priv_root, subdev, ch_gpfifo, {k:v for k, v in self.grctx_bufs.items() if v.lc == 0}) self.rpc_rm_alloc(hParent=ch_gpfifo, hClass=nv_gpu.ADA_COMPUTE_A, params=None) @@ -425,7 +432,8 @@ class NV_GSP(NV_IP): control_args = nv.rpc_gsp_rm_control_v(hClient=(client:=client or self.priv_root), hObject=hObject, cmd=cmd, flags=0x0, paramsSize=ctypes.sizeof(params) if params is not None else 0x0) self.cmd_q.send_rpc(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, bytes(control_args) + (bytes(params) if params is not None else b'')) - return self.stat_q.wait_resp(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL)[len(bytes(control_args)):] + res = self.stat_q.wait_resp(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL) + return type(params).from_buffer_copy(res[len(bytes(control_args)):]) if params is not None else None def rpc_set_page_directory(self, device, hVASpace, pdir_paddr, client=None, pasid=0xffffffff): params = nv.struct_NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS_v1E_05(physAddress=pdir_paddr,