hcq: helper for visible devices (#12950)

* hcq: helper for visible devices

* fix

* f
This commit is contained in:
nimlgen
2025-10-28 02:27:56 +08:00
committed by GitHub
parent f2ffe9c8cf
commit 372d9e5753
5 changed files with 10 additions and 12 deletions

View File

@@ -41,7 +41,7 @@ BEAM | [#] | number of beams in kernel beam search
DEFAULT_FLOAT | [HALF, ...]| specify the default float dtype (FLOAT32, HALF, BFLOAT16, FLOAT64, ...), default to FLOAT32
IMAGE | [1-2] | enable 2d specific optimizations
FLOAT16 | [1] | use float16 for images instead of float32
VISIBLE_DEVICES | [list[int]]| restricts the NV/AMD devices that are available. The format is a comma-separated list of identifiers (indexing starts with 0).
HCQ_VISIBLE_DEVICES | [list[int]]| restricts the HCQ devices that are available. The format is a comma-separated list of identifiers (indexing starts with 0).
JIT | [0-2] | 0=disabled, 1=[jit enabled](quickstart.md#jit) (default), 2=jit enabled, but graphs are disabled
VIZ | [1] | 0=disabled, 1=[viz enabled](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/viz)
ALLOW_TF32 | [1] | enable TensorFloat-32 tensor cores on Ampere or newer GPUs.

View File

@@ -4,7 +4,7 @@ import os, ctypes, struct, hashlib, functools, importlib, mmap, errno, array, co
assert sys.platform != 'win32'
from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
from tinygrad.runtime.support.hcq import MMIOInterface, BumpAllocator
from tinygrad.runtime.support.hcq import MMIOInterface, BumpAllocator, hcq_filter_visible_devices
from tinygrad.uop.ops import sint
from tinygrad.device import Compiled, DMAFdRef, BufferSpec, CompilerPairT
from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, ProfileEvent, suppress_finalizing, lo32, hi32, colored
@@ -575,9 +575,7 @@ class KFDIface:
if KFDIface.kfd is None:
KFDIface.kfd = FileIOInterface("/dev/kfd", os.O_RDWR)
gpus = [g for g in FileIOInterface(kfd_topo_path).listdir() if self._is_usable_gpu(FileIOInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
gpus = sorted(gpus, key=lambda x: int(x.split('/')[-1]))
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', getenv('HIP_VISIBLE_DEVICES', ''))).split(',') if x.strip()]
KFDIface.gpus = [gpus[x] for x in visible_devices] if visible_devices else gpus
KFDIface.gpus = hcq_filter_visible_devices(sorted(gpus, key=lambda x: int(x.split('/')[-1])))
if device_id >= len(KFDIface.gpus): raise RuntimeError(f"No device found for {device_id}. Requesting more devices than the system has?")

View File

@@ -4,7 +4,7 @@ assert sys.platform != 'win32'
from typing import cast, ClassVar
from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator
from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU
from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU, hcq_filter_visible_devices
from tinygrad.uop.ops import sint
from tinygrad.device import BufferSpec, CompilerPairT
from tinygrad.helpers import getenv, mv_address, round_up, data64, data64_le, prod, OSX, to_mv, hi32, lo32, suppress_finalizing
@@ -321,8 +321,7 @@ class NVKIface:
with contextlib.suppress(RuntimeError): uvm.mm_initialize(self.fd_uvm_2, uvmFd=self.fd_uvm.fd) # this error is okay, CUDA hits it too
nv_iowr(NVKIface.fd_ctl, nv_gpu.NV_ESC_CARD_INFO, gpus_info:=(nv_gpu.nv_ioctl_card_info_t*64)())
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', getenv('CUDA_VISIBLE_DEVICES', ''))).split(',') if x.strip()]
NVKIface.gpus_info = [gpus_info[x] for x in visible_devices] if visible_devices else gpus_info
NVKIface.gpus_info = hcq_filter_visible_devices(gpus_info)
self.dev, self.device_id = dev, device_id
if self.device_id >= len(NVKIface.gpus_info) or not NVKIface.gpus_info[self.device_id].valid:

View File

@@ -57,6 +57,9 @@ if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockFileIOInterf
# **************** for HCQ Compatible Devices ****************
def hcq_filter_visible_devices(dev):
return [dev[x] for x in ids] if (ids:=[int(x) for x in (getenv('HCQ_VISIBLE_DEVICES', '')).split(',') if x.strip()]) else dev
SignalType = TypeVar('SignalType', bound='HCQSignal')
HCQDeviceType = TypeVar('HCQDeviceType', bound='HCQCompiled')
ProgramType = TypeVar('ProgramType', bound='HCQProgram')

View File

@@ -2,7 +2,7 @@ import os, mmap, array, functools, ctypes, select, contextlib, dataclasses, sys,
from typing import cast, ClassVar
from tinygrad.helpers import round_up, getenv, OSX, temp, ceildiv
from tinygrad.runtime.autogen import libc, vfio, pci
from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface, HCQBuffer
from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface, HCQBuffer, hcq_filter_visible_devices
from tinygrad.runtime.support.memory import MemoryManager, VirtMapping
from tinygrad.runtime.support.usb import ASM24Controller, USBMMIOInterface
@@ -243,9 +243,7 @@ class LNXPCIIfaceBase:
def __init__(self, dev, dev_id, vendor, devices, bars, vram_bar, va_start, va_size):
if len((cls:=type(self)).gpus) == 0:
cls.gpus = System.pci_scan_bus(vendor, devices)
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', '')).split(',') if x.strip()]
cls.gpus = [cls.gpus[x] for x in visible_devices] if visible_devices else cls.gpus
cls.gpus = hcq_filter_visible_devices(System.pci_scan_bus(vendor, devices))
# Acquire va range to avoid collisions.
FileIOInterface.anon_mmap(va_start, va_size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, 0)