From c7d17c25d9b320e4960e6887f5976e40dd1c902b Mon Sep 17 00:00:00 2001 From: George Hotz Date: Fri, 10 Mar 2023 17:41:19 -0800 Subject: [PATCH] ugh, that's getting ugle --- tinygrad/codegen/gpu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tinygrad/codegen/gpu.py b/tinygrad/codegen/gpu.py index 0f19d6c733..6b22ce6474 100644 --- a/tinygrad/codegen/gpu.py +++ b/tinygrad/codegen/gpu.py @@ -73,7 +73,7 @@ class GPUCodegen(ASTKernel): assert len(self.sts[buf_index].views) == 1, "store has more than one view" # all stores can merge, since they have one view and are valid - should_upcast = self.lang.float4 and self.buftokens[buf_index].can_float4() and (self.bufs[buf_index].dtype != dtypes.float16 and not hasattr(self.bufs[buf_index]._buf, "IMAGE")) + should_upcast = self.lang.float4 and self.buftokens[buf_index].can_float4() and (self.bufs[buf_index].dtype != dtypes.float16 or hasattr(self.bufs[buf_index]._buf, "IMAGE")) to_store = {o:v for o,v in zip(self.buftokens[buf_index].offsets(), value)} did_store = set() @@ -101,7 +101,7 @@ class GPUCodegen(ASTKernel): val = self.bufs[buf_index]._backing[0] assert not math.isnan(val) const = Token(f"({val}f)", Types.FLOAT) - should_upcast = self.lang.float4 and const is None and self.buftokens[buf_index].can_float4() and (self.bufs[buf_index].dtype != dtypes.float16 and not hasattr(self.bufs[buf_index]._buf, "IMAGE")) + should_upcast = self.lang.float4 and const is None and self.buftokens[buf_index].can_float4() and (self.bufs[buf_index] is None or self.bufs[buf_index].dtype != dtypes.float16 or hasattr(self.bufs[buf_index]._buf, "IMAGE")) tokens = [] test_idy = [] for o in self.buftokens[buf_index].offsets():