Files
tinygrad/test/helpers.py
uuuvn 7bc4864bc4 Make dev a property of Allocator (#10286)
* Make `dev` a property of `Allocator`

(this is a prereq refactor for #10285)

At least `BufferXfer.copy` accesses it assuming it's always present,
currently most devices just add this property on their own repeating
the same code over and over again.

This is also a bit footguny, see `RemoteAllocator` that named this
property `device` instead of `dev`, i could obviously just change that
in one place but doing it globally seems like a better solution (and it
reduces code duplication too).

`MallocAllocator` is a bit special, but passing `None` works just fine.

* typing

* ignore type instead of cast
2025-05-13 17:01:01 -07:00

72 lines
3.3 KiB
Python

import time, struct
from typing import Any, Callable, Optional
import numpy as np
from tinygrad import Tensor, dtypes, Device
from tinygrad.ops import UOp, Ops, sint
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.tensor import _to_np_dtype
from tinygrad.engine.realize import Runner
from tinygrad.dtype import ConstType, DType
from tinygrad.nn.state import get_parameters
from tinygrad.helpers import T, unwrap, CI
from tinygrad.codegen import full_rewrite
from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler
def derandomize_model(model):
for p in get_parameters(model):
p.replace(Tensor.empty(p.shape, device=p.device, dtype=p.dtype))
p.realize()
def assert_jit_cache_len(fxn, expected_len):
if not fxn.jit_cache:
assert expected_len == 0, expected_len
return
# until we have a better way of typing the prg in ExecItem
if issubclass(type(fxn.jit_cache[0].prg), Runner) and not type(fxn.jit_cache[0].prg).__name__.endswith('Graph'):
assert len(fxn.jit_cache) == expected_len, f"expected {expected_len}, got {len(fxn.jit_cache)}"
else:
assert len(fxn.jit_cache) == 1, len(fxn.jit_cache)
# until we have a better way of typing the prg in ExecItem
assert type(fxn.jit_cache[0].prg).__name__.endswith('Graph')
assert len(fxn.jit_cache[0].prg.jit_cache) == expected_len
def rand_for_dtype(dt:DType, size:int):
if dtypes.is_unsigned(dt):
return np.random.randint(0, 100, size=size, dtype=_to_np_dtype(dt))
elif dtypes.is_int(dt):
return np.random.randint(-100, 100, size=size, dtype=_to_np_dtype(dt))
elif dt == dtypes.bool:
return np.random.choice([True, False], size=size)
return np.random.uniform(-10, 10, size=size).astype(_to_np_dtype(dt))
def ast_const(dtype:DType, val:ConstType, shape:tuple[sint, ...]=(), st:Optional[ShapeTracker]=None, st_src:Optional[tuple[UOp]]=None) -> UOp:
if st_src is None:
st_src = (st.to_uop() if st is not None else ShapeTracker.from_shape(()).reshape((1,)*len(shape)).expand(shape).to_uop(),)
st = unwrap(st_src[0].st)
if all(v.mask is None for v in st.views): return UOp.const(dtype, val).replace(src=(st.to_uop(),))
return UOp.const(dtype, val).valid(st)
def timeit(fxn:Callable[..., T], *args, **kwargs) -> tuple[T, float]:
st = time.perf_counter_ns()
ret = fxn(*args, **kwargs)
return ret, (time.perf_counter_ns()-st)*1e-6
def eval_uop(uop:UOp, inputs:list[tuple[DType, list[Any]]]|None=None):
allocator = Device['PYTHON'].allocator
bufs = []
for buf_dt, data in inputs or []:
bufs.append(buf:=allocator.alloc(len(data) * buf_dt.itemsize))
allocator._copyin(buf, memoryview(struct.pack(str(len(data)) + buf_dt.fmt, *data)))
g = UOp(Ops.DEFINE_GLOBAL, uop.dtype.ptr(), arg=0, src=())
lst = full_rewrite(UOp.store(g.index(UOp.const(dtypes.int, 0)), uop).sink(), PythonRenderer)
prog = PythonProgram("run", PythonCompiler().compile(PythonRenderer().render(lst)))
prog(out_buf:=allocator.alloc(uop.dtype.itemsize), *bufs)
return out_buf.cast(uop.dtype.fmt).tolist()[0]
def not_support_multi_device():
# GPU and CUDA don't support multi device if in CI
return CI and REAL_DEV in ("GPU", "CUDA")
# NOTE: This will open REMOTE if it's the default device
REAL_DEV = (Device.DEFAULT if Device.DEFAULT != "REMOTE" else Device['REMOTE'].properties.real_device)