From 92df52d79afa7f625c1d608b2d6adf92eb981b85 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Sat, 13 Sep 2025 17:00:11 +0300 Subject: [PATCH] make method_cache account for compiler (#12156) * make method_cache account for compiler * sorry --- test/external/external_test_speed_llama.py | 6 +++--- test/test_kernel_cache.py | 6 +++--- test/test_method_cache.py | 12 ++++++------ tinygrad/engine/realize.py | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/test/external/external_test_speed_llama.py b/test/external/external_test_speed_llama.py index 3d468e6257..30dde43781 100644 --- a/test/external/external_test_speed_llama.py +++ b/test/external/external_test_speed_llama.py @@ -20,7 +20,7 @@ class TestLLaMASpeed(unittest.TestCase): def test_llama_compile(self): backup_program = Device[Device.DEFAULT].runtime backup_allocator = Device[Device.DEFAULT].allocator - backup_compiler = Device[Device.DEFAULT].compiler + backup_compiler = Device[Device.DEFAULT].compiler.compile_cached Device[Device.DEFAULT].runtime = FakeProgram Device[Device.DEFAULT].allocator = FakeAllocator(Device.default) @@ -44,14 +44,14 @@ class TestLLaMASpeed(unittest.TestCase): run_llama("codegen(1)") # test no compiler use for this - Device[Device.DEFAULT].compiler = None + Device[Device.DEFAULT].compiler.compile_cached = None run_llama("methodcache", False) with Profiling(sort='time', frac=0.1, fn="/tmp/llama.prof", ts=5): run_llama("profile", False) Device[Device.DEFAULT].runtime = backup_program Device[Device.DEFAULT].allocator = backup_allocator - Device[Device.DEFAULT].compiler = backup_compiler + Device[Device.DEFAULT].compiler.compile_cached = backup_compiler if __name__ == '__main__': TestLLaMASpeed().test_llama_compile() diff --git a/test/test_kernel_cache.py b/test/test_kernel_cache.py index 164b501a41..a4f0f2193a 100644 --- a/test/test_kernel_cache.py +++ b/test/test_kernel_cache.py @@ -16,14 +16,14 @@ class TestKernelCache(unittest.TestCase): a1 = Tensor.rand(4,4).realize() b1 = Tensor.rand(4,4).realize() - orig_compile_func = Device['CPU'].compiler - Device['CPU'].compiler = None # making it not callable + orig_compile_func = Device['CPU'].compiler.compile_cached + Device['CPU'].compiler.compile_cached = None # making it not callable try: x1 = a1 + b1 + unique_const x1.realize() # Same kernel should be from cache. finally: - Device['CPU'].compiler = orig_compile_func + Device['CPU'].compiler.compile_cached = orig_compile_func if __name__ == "__main__": unittest.main() diff --git a/test/test_method_cache.py b/test/test_method_cache.py index 497b406925..ce413e7709 100644 --- a/test/test_method_cache.py +++ b/test/test_method_cache.py @@ -5,9 +5,9 @@ from tinygrad.nn.state import get_state_dict class TestMethodCache(unittest.TestCase): def setUp(self): - self.backup_compiler = Device[Device.DEFAULT].compiler + self.backup_compiler = Device[Device.DEFAULT].compiler.compile_cached def tearDown(self): - Device[Device.DEFAULT].compiler = self.backup_compiler + Device[Device.DEFAULT].compiler.compile_cached = self.backup_compiler def test_simple_methodcache(self): a = Tensor([1]) @@ -15,19 +15,19 @@ class TestMethodCache(unittest.TestCase): c = Tensor([3]) d = Tensor([4]) (a+b).realize() - Device[Device.DEFAULT].compiler = None + Device[Device.DEFAULT].compiler.compile_cached = None (c+d).realize() def test_nested_methodcache(self): a,b,c,d = Tensor([1]), Tensor([2]), Tensor([3]), Tensor([4]) ((a+b)+(a+b)).realize() - Device[Device.DEFAULT].compiler = None + Device[Device.DEFAULT].compiler.compile_cached = None ((c+d)+(c+d)).realize() def test_nested_methodcache_swap(self): a,b,c,d = Tensor([1]), Tensor([2]), Tensor([3]), Tensor([4]) ((a+b)+(c+d)).realize() - Device[Device.DEFAULT].compiler = None + Device[Device.DEFAULT].compiler.compile_cached = None ((c+d)+(a+b)).realize() @unittest.skip("incorrect use of transformer") @@ -38,7 +38,7 @@ class TestMethodCache(unittest.TestCase): # NOTE: you have to do this twice due to the k-v cache for i in range(3): model(Tensor([[1,2,3,4]]), Variable("start_pos", 0, 10).bind(i)).realize() for i in range(3): model(Tensor([[1,2,3,4]]), Variable("start_pos", 0, 10).bind(i)).realize() - Device[Device.DEFAULT].compiler = None + Device[Device.DEFAULT].compiler.compile_cached = None for i in range(3): model(Tensor([[1,2,3,4]]), Variable("start_pos", 0, 10).bind(i)).realize() if __name__ == '__main__': diff --git a/tinygrad/engine/realize.py b/tinygrad/engine/realize.py index 1246e3c2c4..50474a6284 100644 --- a/tinygrad/engine/realize.py +++ b/tinygrad/engine/realize.py @@ -140,13 +140,13 @@ class BufferXfer(BufferCopy): # **************** method cache **************** -method_cache: dict[tuple[str, bytes, tuple[int, ...], bool], CompiledRunner] = {} +method_cache: dict[tuple[str, type, bytes, tuple[int, ...], bool], CompiledRunner] = {} def get_runner(device:str, ast:UOp) -> CompiledRunner: # TODO: this should be all context relevant to rendering context = (BEAM.value, NOOPT.value, DEVECTORIZE.value) - ckey = (device, ast.key, context, False) + ckey = (device, type(Device[device].compiler), ast.key, context, False) if cret:=method_cache.get(ckey): return cret - bkey = (device.split(":")[0], ast.key, context, True) + bkey = (device.split(":")[0], type(Device[device].compiler), ast.key, context, True) if bret:=method_cache.get(bkey): method_cache[ckey] = ret = CompiledRunner(replace(bret.p, device=device), bret.lib) else: