All LazyOps in the Linearizer (#1905)

* loadop buffer on cpu

* works for GPU

* sort of working

* has bugs

* gpu tests pass

* fix some tests

* fix tensor cores

* fix test linearizer

* fix symbolic

* fix has_variable_shape

* non symbolic size

* disable weird test

* simple cache fix

* fix custom function

* fix kopt

* cleanups

* a bit broken on the assign

* contig check

* only buffer

* need that order

* idx
This commit is contained in:
George Hotz
2023-09-24 11:50:00 +08:00
committed by GitHub
parent 0f373b8b47
commit a5820390db
15 changed files with 151 additions and 138 deletions

View File

@@ -14,17 +14,17 @@ from examples.hlb_cifar10 import SpeedyResNet
from examples.llama import Transformer as LLaMaTransformer, MODEL_PARAMS as LLAMA_MODEL_PARAMS
from examples.stable_diffusion import UNetModel
def kopt_search_hook(k, create_k, to_prg, baseline):
def kopt_search_hook(k, create_k, to_prg, baseline, bufs):
import nevergrad as ng
wanna_output = k.bufs[0].toCPU().copy()
wanna_output = bufs[0].toCPU().copy()
def check_opt(x):
try:
k = create_k()
k.process()
k.apply_auto_opt(x)
prg = to_prg(k)
first_tm = prg.exec(k.bufs, force_wait=True, optimizing=True)
np.testing.assert_allclose(wanna_output, k.bufs[0].toCPU(), atol=1e-4, rtol=1e-4)
first_tm = prg.exec(bufs, force_wait=True, optimizing=True)
np.testing.assert_allclose(wanna_output, bufs[0].toCPU(), atol=1e-4, rtol=1e-4)
return first_tm
except Exception:
return 10000_000 # 10000 seconds is infinity