From 2ba96d4c298dfacfc5c1ef7c8fca434ad25f0b50 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Thu, 11 Jul 2024 16:45:03 +0300 Subject: [PATCH] nv use mv_address (#5381) * nv use mv_address * unsued import --- tinygrad/runtime/ops_nv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index cf90734496..c7ce1fefd7 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -4,7 +4,7 @@ from typing import Tuple, List, Any, cast from dataclasses import dataclass from tinygrad.device import HCQCompatCompiled, HCQCompatAllocator, HCQCompatAllocRes, HWCommandQueue, HWComputeQueue, HWCopyQueue, hcq_command, \ hcq_profile, Compiler, CompileError, BufferOptions -from tinygrad.helpers import getenv, from_mv, mv_address, init_c_struct_t, to_mv, round_up, to_char_p_p, DEBUG, prod, PROFILE +from tinygrad.helpers import getenv, mv_address, init_c_struct_t, to_mv, round_up, to_char_p_p, DEBUG, prod, PROFILE from tinygrad.renderer.cstyle import NVRenderer from tinygrad.runtime.ops_cuda import check as cuda_check, _get_bytes, CUDACompiler, PTXCompiler, PTX import tinygrad.runtime.autogen.nv_gpu as nv_gpu @@ -98,11 +98,11 @@ class NVCommandQueue(HWCommandQueue): # pylint: disable=abstract-method self.binded_device._gpu_free(self.hw_page) def _wait(self, signal, value=0): - self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(ctypes.addressof(from_mv(signal))), *nvdata64_le(value), + self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(mv_address(signal)), *nvdata64_le(value), (3 << 0) | (1 << 24)] # ACQUIRE | PAYLOAD_SIZE_64BIT def _signal(self, signal, value=0, timestamp=False): - self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(ctypes.addressof(from_mv(signal))), *nvdata64_le(value), + self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(mv_address(signal)), *nvdata64_le(value), (1 << 0) | (1 << 20) | (1 << 24) | ((1 << 25) if timestamp else 0)] # RELEASE | RELEASE_WFI | PAYLOAD_SIZE_64BIT | RELEASE_TIMESTAMP self.q += [nvmethod(0, nv_gpu.NVC56F_NON_STALL_INTERRUPT, 1), 0x0] def _timestamp(self, signal): return NVCommandQueue._signal(self, signal, timestamp=True) @@ -181,14 +181,14 @@ class NVComputeQueue(NVCommandQueue, HWComputeQueue): def _signal(self, signal, value=0): if (prev_qmd:=self.cmd_idx_to_qmd.get(len(self) - 2)) is None or prev_qmd.release0_enable == 1: return super()._signal(signal, value) - prev_qmd.release0_address_upper, prev_qmd.release0_address_lower = nvdata64(ctypes.addressof(from_mv(signal))) + prev_qmd.release0_address_upper, prev_qmd.release0_address_lower = nvdata64(mv_address(signal)) prev_qmd.release0_payload_upper, prev_qmd.release0_payload_lower = nvdata64(value) prev_qmd.release0_enable = 1 self.cmd_idx_to_qmd[len(self) - 1] = prev_qmd # this command is embedded into qmd. def _update_signal(self, cmd_idx, signal=None, value=None): if (qmd:=self.cmd_idx_to_qmd.get(cmd_idx)) is None: return super()._update_signal(cmd_idx, signal, value) - if signal is not None: qmd.release0_address_upper, qmd.release0_address_lower = nvdata64(ctypes.addressof(from_mv(signal))) + if signal is not None: qmd.release0_address_upper, qmd.release0_address_lower = nvdata64(mv_address(signal)) if value is not None: qmd.release0_payload_upper, qmd.release0_payload_lower = nvdata64(value) def _submit(self, device): self._submit_to_gpfifo(device, cast(NVDevice, device).compute_gpfifo) @@ -204,7 +204,7 @@ class NVCopyQueue(NVCommandQueue, HWCopyQueue): if src is not None: self._patch(cmd_idx, offset=1, data=nvdata64(src)) def _signal(self, signal, value=0): - self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 4), *nvdata64(ctypes.addressof(from_mv(signal))), value, 4] + self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 4), *nvdata64(mv_address(signal)), value, 4] self.q += [nvmethod(4, nv_gpu.NVC6B5_LAUNCH_DMA, 1), 0x14] def _update_signal(self, cmd_idx, signal=None, value=None):