All LazyOps in the Linearizer (#1905)

* loadop buffer on cpu * works for GPU * sort of working * has bugs * gpu tests pass * fix some tests * fix tensor cores * fix test linearizer * fix symbolic * fix has_variable_shape * non symbolic size * disable weird test * simple cache fix * fix custom function * fix kopt * cleanups * a bit broken on the assign * contig check * only buffer * need that order * idx
2026-02-16 09:37:11 -05:00 · 2023-09-24 11:50:00 +08:00
parent 0f373b8b47
commit a5820390db
15 changed files with 151 additions and 138 deletions
--- a/test/models/test_real_world.py
+++ b/test/models/test_real_world.py
@@ -14,17 +14,17 @@ from examples.hlb_cifar10 import SpeedyResNet
 from examples.llama import Transformer as LLaMaTransformer, MODEL_PARAMS as LLAMA_MODEL_PARAMS
 from examples.stable_diffusion import UNetModel

-def kopt_search_hook(k, create_k, to_prg, baseline):
+def kopt_search_hook(k, create_k, to_prg, baseline, bufs):
  import nevergrad as ng
-  wanna_output = k.bufs[0].toCPU().copy()
+  wanna_output = bufs[0].toCPU().copy()
  def check_opt(x):
    try:
      k = create_k()
      k.process()
      k.apply_auto_opt(x)
      prg = to_prg(k)
-      first_tm = prg.exec(k.bufs, force_wait=True, optimizing=True)
-      np.testing.assert_allclose(wanna_output, k.bufs[0].toCPU(), atol=1e-4, rtol=1e-4)
+      first_tm = prg.exec(bufs, force_wait=True, optimizing=True)
+      np.testing.assert_allclose(wanna_output, bufs[0].toCPU(), atol=1e-4, rtol=1e-4)
      return first_tm
    except Exception:
      return 10000_000   # 10000 seconds is infinity