nv use mv_address (#5381)

* nv use mv_address

* unsued import
This commit is contained in:
nimlgen
2024-07-11 16:45:03 +03:00
committed by GitHub
parent bd77efda2f
commit 2ba96d4c29

View File

@@ -4,7 +4,7 @@ from typing import Tuple, List, Any, cast
from dataclasses import dataclass
from tinygrad.device import HCQCompatCompiled, HCQCompatAllocator, HCQCompatAllocRes, HWCommandQueue, HWComputeQueue, HWCopyQueue, hcq_command, \
hcq_profile, Compiler, CompileError, BufferOptions
from tinygrad.helpers import getenv, from_mv, mv_address, init_c_struct_t, to_mv, round_up, to_char_p_p, DEBUG, prod, PROFILE
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, to_mv, round_up, to_char_p_p, DEBUG, prod, PROFILE
from tinygrad.renderer.cstyle import NVRenderer
from tinygrad.runtime.ops_cuda import check as cuda_check, _get_bytes, CUDACompiler, PTXCompiler, PTX
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
@@ -98,11 +98,11 @@ class NVCommandQueue(HWCommandQueue): # pylint: disable=abstract-method
self.binded_device._gpu_free(self.hw_page)
def _wait(self, signal, value=0):
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(ctypes.addressof(from_mv(signal))), *nvdata64_le(value),
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(mv_address(signal)), *nvdata64_le(value),
(3 << 0) | (1 << 24)] # ACQUIRE | PAYLOAD_SIZE_64BIT
def _signal(self, signal, value=0, timestamp=False):
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(ctypes.addressof(from_mv(signal))), *nvdata64_le(value),
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *nvdata64_le(mv_address(signal)), *nvdata64_le(value),
(1 << 0) | (1 << 20) | (1 << 24) | ((1 << 25) if timestamp else 0)] # RELEASE | RELEASE_WFI | PAYLOAD_SIZE_64BIT | RELEASE_TIMESTAMP
self.q += [nvmethod(0, nv_gpu.NVC56F_NON_STALL_INTERRUPT, 1), 0x0]
def _timestamp(self, signal): return NVCommandQueue._signal(self, signal, timestamp=True)
@@ -181,14 +181,14 @@ class NVComputeQueue(NVCommandQueue, HWComputeQueue):
def _signal(self, signal, value=0):
if (prev_qmd:=self.cmd_idx_to_qmd.get(len(self) - 2)) is None or prev_qmd.release0_enable == 1: return super()._signal(signal, value)
prev_qmd.release0_address_upper, prev_qmd.release0_address_lower = nvdata64(ctypes.addressof(from_mv(signal)))
prev_qmd.release0_address_upper, prev_qmd.release0_address_lower = nvdata64(mv_address(signal))
prev_qmd.release0_payload_upper, prev_qmd.release0_payload_lower = nvdata64(value)
prev_qmd.release0_enable = 1
self.cmd_idx_to_qmd[len(self) - 1] = prev_qmd # this command is embedded into qmd.
def _update_signal(self, cmd_idx, signal=None, value=None):
if (qmd:=self.cmd_idx_to_qmd.get(cmd_idx)) is None: return super()._update_signal(cmd_idx, signal, value)
if signal is not None: qmd.release0_address_upper, qmd.release0_address_lower = nvdata64(ctypes.addressof(from_mv(signal)))
if signal is not None: qmd.release0_address_upper, qmd.release0_address_lower = nvdata64(mv_address(signal))
if value is not None: qmd.release0_payload_upper, qmd.release0_payload_lower = nvdata64(value)
def _submit(self, device): self._submit_to_gpfifo(device, cast(NVDevice, device).compute_gpfifo)
@@ -204,7 +204,7 @@ class NVCopyQueue(NVCommandQueue, HWCopyQueue):
if src is not None: self._patch(cmd_idx, offset=1, data=nvdata64(src))
def _signal(self, signal, value=0):
self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 4), *nvdata64(ctypes.addressof(from_mv(signal))), value, 4]
self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 4), *nvdata64(mv_address(signal)), value, 4]
self.q += [nvmethod(4, nv_gpu.NVC6B5_LAUNCH_DMA, 1), 0x14]
def _update_signal(self, cmd_idx, signal=None, value=None):