mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-18 10:31:41 -05:00
test speed llama (#1046)
* test speed llama
* oops, put it back
* uses the real device codegen
* just do it on the mac
* pp
* is faster?
* Revert "is faster?"
This reverts commit 42db542010.
* disable docker again for less load on CI
This commit is contained in:
@@ -314,7 +314,7 @@ class _Device:
|
||||
def __init__(self) -> None:
|
||||
self._buffers: List[str] = [x.stem[len("ops_"):].upper() for x in (pathlib.Path(__file__).parent/"runtime").iterdir() if x.stem.startswith("ops_")]
|
||||
self.DEFAULT: str = functools.reduce(lambda val, ele: ele if getenv(ele) == 1 else val, self._buffers, None) or self._default_device()
|
||||
def canonicalize(self, device:str) -> str: return (device.split(":", 1)[0].upper() + ((":"+device.split(":", 1)[1]) if ':' in device else '')).replace(":0", "")
|
||||
def canonicalize(self, device:Optional[str]) -> str: return (device.split(":", 1)[0].upper() + ((":"+device.split(":", 1)[1]) if ':' in device else '')).replace(":0", "") if device is not None else self.DEFAULT
|
||||
def __getitem__(self, x:str) -> Union[Interpreted, Compiled]: return self._get_device(x.split(":")[0].upper())
|
||||
@functools.lru_cache(maxsize=None) # this class is a singleton, pylint: disable=method-cache-max-size-none
|
||||
def _get_device(self, x:str) -> Union[Interpreted, Compiled]: return [cls for cname, cls in inspect.getmembers(importlib.import_module(f'tinygrad.runtime.ops_{x.lower()}')) if (cname.lower() == x.lower() + "buffer") and x in self._buffers][0]
|
||||
|
||||
17
tinygrad/runtime/ops_fake.py
Normal file
17
tinygrad/runtime/ops_fake.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# used for compilation only speed tests
|
||||
import numpy as np
|
||||
from tinygrad.helpers import dtypes, prod
|
||||
from tinygrad.ops import Compiled
|
||||
from tinygrad.runtime.lib import RawBuffer
|
||||
|
||||
class RawFakeBuffer(RawBuffer):
|
||||
@classmethod
|
||||
def fromCPU(cls, x:np.ndarray, **kwargs): return cls(prod(x.shape), dtypes.from_np(x.dtype), **kwargs)
|
||||
def toCPU(self): return np.empty(self.size, dtype=self.dtype.np)
|
||||
|
||||
class FakeProgram:
|
||||
def __init__(self, name:str, prg:str): pass
|
||||
def __call__(self, global_size, local_size, *args, wait=False): pass
|
||||
|
||||
# NOTE: you have to set a codegen to use this
|
||||
FakeBuffer = Compiled(RawFakeBuffer, None, FakeProgram)
|
||||
@@ -39,7 +39,7 @@ class Tensor:
|
||||
no_grad: ClassVar[bool] = False
|
||||
default_type: ClassVar[DType] = dtypes.float32
|
||||
|
||||
def __init__(self, data:Union[int, float, list, tuple, LazyBuffer, np.ndarray], device=Device.DEFAULT, dtype:Optional[DType]=None, requires_grad:Optional[bool]=None):
|
||||
def __init__(self, data:Union[int, float, list, tuple, LazyBuffer, np.ndarray], device:Optional[str]=None, dtype:Optional[DType]=None, requires_grad:Optional[bool]=None):
|
||||
assert dtype is None or isinstance(dtype, DType), f"invalid dtype {dtype}"
|
||||
device = Device.canonicalize(device)
|
||||
# tensors have gradients, buffers do not
|
||||
@@ -124,7 +124,7 @@ class Tensor:
|
||||
# ***** creation llop entrypoint *****
|
||||
|
||||
@staticmethod
|
||||
def _loadop(op, sz, device=Device.DEFAULT, dtype:Optional[DType]=None, arg=None, **kwargs):
|
||||
def _loadop(op, sz, device:Optional[str]=None, dtype:Optional[DType]=None, arg=None, **kwargs):
|
||||
return Tensor(LazyBuffer.loadop(op, [sz], Tensor.default_type if dtype is None else dtype, Device.canonicalize(device), arg), dtype=dtype, device=device, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user