diff --git a/tinygrad/llops/ops_gpu.py b/tinygrad/llops/ops_gpu.py index 9da7730d1b..97b5f2d591 100644 --- a/tinygrad/llops/ops_gpu.py +++ b/tinygrad/llops/ops_gpu.py @@ -231,7 +231,7 @@ class CLASTKernel(ASTKernel): def codegen(self) -> Callable: self.process() self.upcast_in_mid_reduce = False - if not KOPT: self.hand_coded_optimizations() + if not KOPT or IMAGE == 2: self.hand_coded_optimizations() # add a local buffer for multistage reduce if len(self.group_for_reduce):