From e35c31c8e599cbcd06fee313274a70d713dac805 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Fri, 17 Nov 2023 20:50:07 -0800 Subject: [PATCH] xid for hip, device in time linearizer (#2348) Co-authored-by: Tiny Box --- tinygrad/features/search.py | 2 +- tinygrad/helpers.py | 2 +- tinygrad/runtime/ops_hip.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tinygrad/features/search.py b/tinygrad/features/search.py index 860c1b7598..842c8c58f8 100644 --- a/tinygrad/features/search.py +++ b/tinygrad/features/search.py @@ -22,7 +22,7 @@ actions += [ # returns time in seconds def time_linearizer(lin:Linearizer, rawbufs:List[RawBuffer], allow_test_size=True, max_global_size=65536, cnt=3, disable_cache=False, clear_l2=False) -> float: - key = {"ast": str(lin.ast), "opts": str(lin.applied_opts), "allow_test_size": allow_test_size, "max_global_size": max_global_size, "clear_l2": clear_l2} + key = {"ast": str(lin.ast), "opts": str(lin.applied_opts), "allow_test_size": allow_test_size, "max_global_size": max_global_size, "clear_l2": clear_l2, "device": Device.DEFAULT} if not disable_cache and CACHELEVEL >= 2 and (val:=diskcache_get("time_linearizer", key)) is not None: return min(val) # Set the midpoint value value for var_vals to optimize shapes. diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index e7472bda2c..646cddcc89 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -174,7 +174,7 @@ _cache_dir: str = getenv("XDG_CACHE_HOME", os.path.expanduser("~/Library/Caches" CACHEDB: str = getenv("CACHEDB", os.path.abspath(os.path.join(_cache_dir, "tinygrad", "cache.db"))) CACHELEVEL = getenv("CACHELEVEL", 2) -VERSION = 8 +VERSION = 9 _db_connection = None def db_connection(): global _db_connection diff --git a/tinygrad/runtime/ops_hip.py b/tinygrad/runtime/ops_hip.py index f92c62ad08..1258c17785 100644 --- a/tinygrad/runtime/ops_hip.py +++ b/tinygrad/runtime/ops_hip.py @@ -101,5 +101,6 @@ __device__ void vstore_half2(float2 data, size_t offset, half *p) { *(p + offset __device__ void vstore_half4(float4 data, size_t offset, half *p) { *(p + offset*4) = (half)data.x; *(p + offset*4 + 1) = (half)data.y; *(p + offset*4 + 2) = (half)data.z; *(p + offset*4 + 3) = (half)data.w; } """, gid = [f'blockIdx.{chr(120+i)}' for i in range(3)], - lid = [f'threadIdx.{chr(120+i)}' for i in range(3)])) + lid = [f'threadIdx.{chr(120+i)}' for i in range(3)], + xid = [f'(blockIdx.{chr(120+i)}*blockDim.{chr(120+i)}+threadIdx.{chr(120+i)})' for i in range(3)])) HIPBuffer = Compiled(RawHIPBuffer, LinearizerOptions(device="HIP"), renderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize)