diff --git a/tinygrad/codegen/linearizer.py b/tinygrad/codegen/linearizer.py
index 3cc08a012b..26725cd77e 100644
--- a/tinygrad/codegen/linearizer.py
+++ b/tinygrad/codegen/linearizer.py
@@ -195,7 +195,7 @@ class Linearizer(Kernel):
       if isinstance(buf, MemBuffer):
         self.buf_uops[i] = self.uops.add(UOps.DEFINE_GLOBAL,
                                          buf.dtype if isinstance(buf.dtype, ImageDType) else PtrDType(buf.dtype), (),
-                                         (buf.idx, f"data{buf.idx}", i == 0))
+                                         (buf.idx, f"data{buf.idx}", buf.idx == 0))
     # add var vals
     for i,var in enumerate(self.ast.vars()):
       assert var.expr is not None
@@ -327,7 +327,8 @@ class Linearizer(Kernel):
         assert not locals_to_store, "storing locals isn't supported here"
 
         # load earlybufs
-        loaded_buffers.update({b:self.global_load(self.bufs.index(self.local_alias[i]) if i in self.local_alias else i, global_idxs+local_idxs+reduce_idxs+full_upcast_idxs) for i,b in enumerate(self.bufs[1:], start=1) if b in self.earlybufs})  # noqa: E501
+        loaded_buffers.update({b:self.global_load(self.bufs.index(self.local_alias[i]) if i in self.local_alias else i,
+          global_idxs+local_idxs+reduce_idxs+full_upcast_idxs) for i,b in enumerate(self.bufs) if b in self.earlybufs})
 
         # run early AST (with reduce)
         self.ast_parse(self.reduceop, acc, self.acc_offsets(self.full_buf_index), loaded_buffers, do_reduce=True, loop_ctx=loop_ctx)
@@ -382,7 +383,8 @@ class Linearizer(Kernel):
         local_idxs = local_idxs[:self.local_dims] + [NumNode(0) for i in range(self.group_for_reduces)]
 
     # load latebufs
-    loaded_buffers.update({b:self.global_load(i, global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs) for i,b in enumerate(self.bufs) if b not in self.earlybufs and i != 0 and b.__class__ is not LocalBuffer})  # noqa: E501
+    loaded_buffers.update({b:self.global_load(i, global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs) \
+                           for i,b in enumerate(self.bufs) if b not in self.earlybufs and b.__class__ is not LocalBuffer})
 
     # run late AST (without the store)
     val = self.ast_parse(self.ast.src[0], acc, None, loaded_buffers)
diff --git a/tinygrad/codegen/uops.py b/tinygrad/codegen/uops.py
index 3d0dd3e280..dbdd091c41 100644
--- a/tinygrad/codegen/uops.py
+++ b/tinygrad/codegen/uops.py
@@ -19,7 +19,7 @@ class UOps(Enum):
 @dataclass(eq=False)
 class UOp:
   uop: UOps
-  dtype: Optional[DType]
+  dtype: Optional[DType] = None
   vin: Tuple[UOp, ...] = tuple()
   arg: Any = None
   def __repr__(self):
diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py
index 1c340bed8a..0084817777 100644
--- a/tinygrad/lazy.py
+++ b/tinygrad/lazy.py
@@ -50,8 +50,9 @@ class LazyBuffer:
 
   @staticmethod
   def loadop(op, shape:Tuple[sint,...], dtype:DType, device:str, arg=None,
-             src:Optional[LazyBuffer]=None, enable_cache=False, _buf:Optional[Buffer]=None) -> LazyBuffer:
-    ret = create_lazybuffer(device, ShapeTracker.from_shape(shape), dtype, op, arg, (src,) if src is not None else (), enable_cache=enable_cache)
+             src:Tuple[LazyBuffer, ...]=(), enable_cache=False, _buf:Optional[Buffer]=None) -> LazyBuffer:
+    assert isinstance(src, tuple)
+    ret = create_lazybuffer(device, ShapeTracker.from_shape(shape), dtype, op, arg, src, enable_cache=enable_cache)
     if _buf is not None: ret.realized = _buf
     return ret
 
@@ -82,8 +83,8 @@ class LazyBuffer:
     if self.device.startswith("EXT") or self.device.startswith("DISK"):
       # DISK/EXT don't sync
       return create_lazybuffer(device, ShapeTracker.from_shape(self.shape), self.dtype, LoadOps.COPY, None, (self,), enable_cache=False)
-    sync = LazyBuffer.loadop(LoadOps.SYNC, (sync_size,), dtypes.uint32, self.device, src=self, enable_cache=True)
-    wait = LazyBuffer.loadop(LoadOps.WAIT, (0,), dtypes.uint32, device, src=sync, enable_cache=True)
+    sync = LazyBuffer.loadop(LoadOps.SYNC, (sync_size,), dtypes.uint32, self.device, src=(self,), enable_cache=True)
+    wait = LazyBuffer.loadop(LoadOps.WAIT, (0,), dtypes.uint32, device, src=(sync,), enable_cache=True)
     return create_lazybuffer(device, ShapeTracker.from_shape(self.shape), self.dtype, LoadOps.COPY, None, (self, wait), enable_cache=False)
 
   def copy_to_device(self, device:str) -> LazyBuffer:
diff --git a/tinygrad/renderer/cstyle.py b/tinygrad/renderer/cstyle.py
index c5e58941b9..f863a48391 100644
--- a/tinygrad/renderer/cstyle.py
+++ b/tinygrad/renderer/cstyle.py
@@ -118,9 +118,8 @@ def uops_to_cstyle(lang:CStyleLanguage, function_name:str, uops:UOpGraph) -> str
       kk("}")
     elif uop is UOps.STORE:
       assert vin[0].dtype is not None and vin[2].dtype is not None
-      if len(vin) > 3: kk(f"if ({r[vin[3]]}) {{")
-      kk(lang.render_store(r[vin[0]], vin[0].dtype, r[vin[2]], vin[2].dtype, strip_parens(r[vin[1]]), vin[0].uop is UOps.DEFINE_LOCAL))
-      if len(vin) > 3: kk("}")
+      rendered_store = lang.render_store(r[vin[0]], vin[0].dtype, r[vin[2]], vin[2].dtype, strip_parens(r[vin[1]]), vin[0].uop is UOps.DEFINE_LOCAL)
+      kk(f"if ({r[vin[3]]}) {{ {rendered_store} }}" if len(vin) > 3 else rendered_store)
     else:
       assert dtype is not None, f"None dtype for uop {uop}"
       if uop is UOps.LOOP:
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index d39ef74185..c84b29541a 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -37,7 +37,7 @@ class Function:
 
 import tinygrad.mlops as mlops
 
-def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str, ...]], arg=None, src:Optional[LazyBuffer]=None):
+def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str, ...]], arg=None, src:Tuple[LazyBuffer, ...]=()):
   if isinstance(device, str): return LazyBuffer.loadop(op, shape, dtype, device, arg, src)
   return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None)