mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
* testing new memops
* better debugging
* testing padded conv
* branching with load
* refactoring a bit
* first try
* fixing bugs
* fixing some
* eq
* eq2
* do not use x's
* working
* fixing imm
* getting things working
* refactor
* pow not working
* working except one
* refactor: one store mem
* refactor: global load
* refactor: imm
* refactor: cleaning
* fixing big offsets
* refactor with ci
* try ci
* typo
* another typo
* ubuntu default
* forgot git
* do i need git?
* missing packages
* adding python-dev
* with cache?
* buildx action
* buildx name issue?
* maybe now?
* python3
* newline warning
* maybe now
* i actually need this
* ci should work now
* improved caching
* fixing cache
* maybe now it will cache
* this
* testing cache
* trying again
* load
* missing platform
* caching gha
* testing cache
* full testing
* typo
* now?
* why
* adding checkout back
* bad formatting
* fixing convention issues
* supporting python
* adding CI flag
* testing all
* better comments
* adding debugging
* takes 12x longer
* does it output progress now?
* ignore models for speed
* fixing merge
* excluding conv_transpose2d
* only 2 test cuz is to slow
* another approach
* let's see
* faster duh
* my bad
* T_T
* typo
* sup
* with output?
* comment test
* comment test
* comment test
* :?
* no comment
* with cache
* back to normal
* testing that ci works
* back to passing
* trying again
* does it create another entry
* does it create another entry?
* build local
* hey
* Revert "excluding conv_transpose2d"
This reverts commit cc7348de03.
* does it cache if done before?
* does it cache?
* done
* adding test ops
* bad formatting
* no need for this
* working static mem
* sum 1d
* add ndim
* better reg import
* fix stack
* back to np
* working except for softmax
* 5 failing
* no pogress
* remove keystone
* remove keystone
* testops passing
* cleanups
* more cleanup
* typo
* ci
* ci2
* cond import
* ci3
* ci4
* ci4
* ci5
* ci5
* ci6
* aligment
* test all
* correct test
* err read_unmapped
* passing test
* ignore for speed
* ignore for speed
* ci7
* cleanup
* remove docker
* fixing merge
* fixing bugs
* add skipload for const ops
* comments
* First merge to master: Renderer
* fix emulation
* passing all tests arm64
* cleaning
* fix handcoded binary
* cleaning
* fix errs
* fix runtime arg binary
* clean git diff
* fix and clean
* fixing metal test
* cleaning
* fix metal test
* ci ~8 min
* fix pylint and clang
* cache the files in ops_clang
---------
Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
58 lines
2.2 KiB
Python
58 lines
2.2 KiB
Python
# NOTE: this only tests the speed of the LLaMA codegen, it doesn't actually run the net
|
|
import unittest, time
|
|
import numpy as np
|
|
from examples.llama import Transformer, MODEL_PARAMS
|
|
from test.test_net_speed import start_profile, stop_profile
|
|
from tinygrad.tensor import Tensor
|
|
from tinygrad.lazy import Device
|
|
from tinygrad.state import get_state_dict
|
|
from tinygrad.ops import Compiled
|
|
from tinygrad.helpers import dtypes, prod
|
|
from tinygrad.runtime.lib import RawBuffer
|
|
|
|
class FakeProgram:
|
|
def __init__(self, name:str, prg:str, binary:bool): pass
|
|
def __call__(self, global_size, local_size, *bufs, wait=False): pass
|
|
|
|
class RawFakeBuffer(RawBuffer):
|
|
@classmethod
|
|
def fromCPU(cls, x:np.ndarray, **kwargs): return cls(prod(x.shape), dtypes.from_np(x.dtype), **kwargs)
|
|
def toCPU(self): return np.empty(self.size, dtype=self.dtype.np)
|
|
|
|
class TestLLaMASpeed(unittest.TestCase):
|
|
@unittest.skipIf(not isinstance(Device[Device.DEFAULT], Compiled), "only test for compiled backends")
|
|
def test_llama_compile(self):
|
|
backup_program = Device[Device.DEFAULT].runtime
|
|
backup_buffer = Device[Device.DEFAULT].buffer
|
|
Device[Device.DEFAULT].runtime = FakeProgram
|
|
Device[Device.DEFAULT].buffer = RawFakeBuffer
|
|
|
|
print("testing llama python run time")
|
|
model = Transformer(**MODEL_PARAMS[1]["7B"]["args"])
|
|
print("built model")
|
|
# assign fake tensors to the values
|
|
for v in get_state_dict(model).values(): v.assign(Tensor.empty(*v.shape, dtype=v.dtype))
|
|
print("assigned empty tensors, doing warmup")
|
|
|
|
def run_llama(st, empty_method_cache=True):
|
|
if empty_method_cache: Device[Device.DEFAULT].method_cache.clear()
|
|
tms = [time.perf_counter()]
|
|
for i in range(10):
|
|
model(Tensor([[2]]), i).realize()
|
|
tms.append(time.perf_counter())
|
|
timings = [(tms[i+1]-tms[i])*1000 for i in range(len(tms)-1)]
|
|
print(f"{st:15s} mean runtime: {sum(timings)/len(timings):7.2f}ms, runs: ", ", ".join(f'{x:7.2f}' for x in timings))
|
|
|
|
run_llama("codegen")
|
|
run_llama("methodcache", False)
|
|
|
|
pr = start_profile()
|
|
run_llama("profile")
|
|
stop_profile(pr, sort='time', frac=0.1)
|
|
|
|
Device[Device.DEFAULT].runtime = backup_program
|
|
Device[Device.DEFAULT].buffer = backup_buffer
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|