diff --git a/tinygrad/codegen/ast.py b/tinygrad/codegen/ast.py
index 5fdc328221..a1940568a4 100644
--- a/tinygrad/codegen/ast.py
+++ b/tinygrad/codegen/ast.py
@@ -7,8 +7,7 @@ from tinygrad.shape import ShapeTracker, View, strides_for_shape
 
 def get_first_reduce(shapes):
   for i in range(len(shapes[0])):
-    if not all_same([x[i] for x in shapes]):
-      return i
+    if not all_same([x[i] for x in shapes]): return i
   return len(shapes[0])  # off the end
 
 # this will be removed soon anyway
@@ -89,9 +88,7 @@ class ASTKernel:
     self.full_buf_index : int = self.bufs.index(self.earlybufs[0]) if len(self.earlybufs) > 0 else 0
 
   def print(self):
-    buf_count = -1
-    op_count = -1
-    cache = {}
+    buf_count, op_count, cache = -1, -1, {}
     def print_ast(x, name=None):
       nonlocal buf_count, op_count
       if x not in cache:
@@ -114,8 +111,7 @@ class ASTKernel:
   def printbufs(self, prefix="", print_shapetrackers=False):
     print(f"first_reduce: {self.first_reduce} shape_len: {self.shape_len} group_for_reduce: {self.group_for_reduce}")
     if print_shapetrackers:
-      for st in self.sts:
-        print(st)
+      for st in self.sts: print(st)
     for i in range(len(self.sts)):
       print(prefix, self.buftokens[i], f"early:{'T' if i < len(self.bufs) and self.bufs[i] in self.earlybufs else 'F'}", self.sts[i].shape, self.sts[i].views[-1].strides, len(self.sts[i].views), type(self.bufs[i]._buf) if self.bufs[i] is not None else "FAKE")
 
@@ -158,10 +154,8 @@ class ASTKernel:
       # more can merge than this
       mergeable = all(can_merge) and i != self.first_reduce
       for j in range(len(shapes)):
-        if mergeable:
-          rets[j][-1] = (rets[j][-1][0] * shapes[j][i], strides[j][i])
-        else:
-          rets[j].append((shapes[j][i], strides[j][i]))
+        if mergeable: rets[j][-1] = (rets[j][-1][0] * shapes[j][i], strides[j][i])
+        else: rets[j].append((shapes[j][i], strides[j][i]))
 
     for i,x in enumerate(rets): self.sts[i].reshape(tuple(y[0] for y in x))
     self.first_reduce = get_first_reduce([x.shape for x in self.sts])
diff --git a/tinygrad/codegen/gpu.py b/tinygrad/codegen/gpu.py
index cbdc6d06b9..ddb1049c46 100644
--- a/tinygrad/codegen/gpu.py
+++ b/tinygrad/codegen/gpu.py
@@ -322,12 +322,10 @@ class GPUCodegen(ASTKernel):
     function_name = ("re_S" if self.reduceop else "ew_S") + '_'.join([str(x) for x in self.full_shape])
 
     # painfully name the function
-    if prg in GPUCodegen.kernel_name_cache:
-      function_name = GPUCodegen.kernel_name_cache[prg]
+    if prg in GPUCodegen.kernel_name_cache: function_name = GPUCodegen.kernel_name_cache[prg]
     else:
       GPUCodegen.kernel_cnt[function_name] += 1
-      if GPUCodegen.kernel_cnt[function_name]:
-        function_name = f"{function_name}{'_N'+str(GPUCodegen.kernel_cnt[function_name])}"
+      if GPUCodegen.kernel_cnt[function_name]: function_name = f"{function_name}{'_N'+str(GPUCodegen.kernel_cnt[function_name])}"
       GPUCodegen.kernel_name_cache[prg] = function_name
 
     return ASTRunner(function_name, prg.replace("KERNEL_NAME_PLACEHOLDER", function_name), self.bufs_to_delete,
diff --git a/tinygrad/codegen/llvm.py b/tinygrad/codegen/llvm.py
index e1e55312fa..6d8d7fe084 100644
--- a/tinygrad/codegen/llvm.py
+++ b/tinygrad/codegen/llvm.py
@@ -112,8 +112,7 @@ class LLVMCodegen(ASTKernel):
     func.attributes.add('"no-nans-fp-math"="true"')
 
     # construct the structure of the loops
-    loop_entry = [ir.IRBuilder(func.append_basic_block(name="entry"))]
-    loop_exit = []
+    loop_entry, loop_exit = [ir.IRBuilder(func.append_basic_block(name="entry"))], []
     for i,_ in enumerate(full_shape): loop_entry.append(ir.IRBuilder(func.append_basic_block(name=f"loop_{i}")))
     for i,_ in enumerate(full_shape): loop_exit.append(ir.IRBuilder(func.append_basic_block(name=f"loopexit_{len(full_shape)-1-i}")))
     loop_exit.append(ir.IRBuilder(func.append_basic_block(name="exit")))
@@ -174,8 +173,7 @@ class LLVMCodegen(ASTKernel):
     if self.reduceop:
       reduce_input = ast_parse(loop_exit[-1], self.reduceop.src[0], -1)
       phis = [LLVMCodegen.start_for_op[self.reduceop.op]]  # type: ignore
-      if kernel_output_dim > 1:
-        phis = [kernel_output_type(phis * kernel_output_dim)]
+      if kernel_output_dim > 1: phis = [kernel_output_type(phis * kernel_output_dim)]
       for i in range(store_loop+1, len(loop_entry)):
         val = loop_entry[i].phi(kernel_output_type, f"reduce_phi_{i}")
         val.add_incoming(phis[-1], loop_entry[i-1]._block)
diff --git a/tinygrad/graph.py b/tinygrad/graph.py
index 44ff443bfd..b73cd58da7 100644
--- a/tinygrad/graph.py
+++ b/tinygrad/graph.py
@@ -18,10 +18,8 @@ G = nx.DiGraph() if nx is not None else None
 cnts : Dict[OpType, int] = defaultdict(int)
 if GRAPH:
   def save_graph_exit():
-    for k,v in cnts.items():
-      print(k, v)
-    if PRUNEGRAPH:
-      prune_graph()
+    for k,v in cnts.items(): print(k, v)
+    if PRUNEGRAPH: prune_graph()
     print("saving", G)
     nx.drawing.nx_pydot.write_dot(G, f'{GRAPHPATH}.dot')
     # -Gnslimit=100 can make it finish, but you won't like results
@@ -61,8 +59,7 @@ def log_op(ret : DeviceBuffer, ast : LazyOp, show_graph : Optional[bool] = None)
       G.add_edge(nm(x), nm(ret), label=get_sop(op))
       if 'label' not in G.nodes[nm(x)]:
         G.nodes[nm(x)]['label'] = str(x.shape)
-    if nm(ret) not in G.nodes:
-      G.add_node(nm(ret))
+    if nm(ret) not in G.nodes: G.add_node(nm(ret))
 
     G.nodes[nm(ret)]['label'] = str(set(x.shape for x in inp))+"\n"+str(ret.shape) if optype == ReduceOps else str(ret.shape)
     G.nodes[nm(ret)]['fillcolor'] = (top_colors[optype] + ('80' if dashed else str())) if optype in top_colors else "#ffffff"
diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py
index a82ea06a74..006b376ff5 100644
--- a/tinygrad/helpers.py
+++ b/tinygrad/helpers.py
@@ -15,5 +15,4 @@ def mnum(i) -> str: return str(i) if i >= 0 else f"m{-i}"
 @functools.lru_cache(maxsize=None)
 def getenv(key, default=0): return type(default)(os.getenv(key, default))
 
-DEBUG = getenv("DEBUG", 0)
-IMAGE = getenv("IMAGE", 0)
+DEBUG, IMAGE = getenv("DEBUG", 0), getenv("IMAGE", 0)
diff --git a/tinygrad/jit.py b/tinygrad/jit.py
index 885e49e762..3cdb757c46 100644
--- a/tinygrad/jit.py
+++ b/tinygrad/jit.py
@@ -8,10 +8,10 @@ from tinygrad.ops import GlobalCounters, CompiledBuffer, RawBuffer
 
 class TinyJit:
   def __init__(self, fxn:Callable):
-    self.fxn = fxn
-    self.cnt = 0
+    self.fxn : Callable = fxn
+    self.cnt : int = 0
     self.jit_cache : List[Tuple[Callable, Any]] = []  # TODO: Any should be List[RawBuffer], but this fails
-    self.ret = None
+    self.ret : Any = None
     self.input_replace : Dict[Tuple[int, int], Union[int, str]]= {}
 
   def __call__(self, *args, **kwargs) -> Any:
diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py
index 27cfc8ef67..b5246ea998 100644
--- a/tinygrad/lazy.py
+++ b/tinygrad/lazy.py
@@ -62,8 +62,7 @@ def _ast_binaryops(self:LazyBuffer) -> LazyOp:
   # reshape all the late ops into the output shape
   # NOTE: these RESHAPEs will return self if they don't change the shape
   for x in real_srcs.keys():
-    if real_srcs[x] is None:
-      real_srcs[x] = x.movement_op(MovementOps.RESHAPE, intermediate_shape)
+    if real_srcs[x] is None: real_srcs[x] = x.movement_op(MovementOps.RESHAPE, intermediate_shape)
   ast = map_buffers(real_srcs, self.op)
   return LazyOp(MovementOps.RESHAPE, (ast, ), self.shape) if intermediate_shape != self.shape else ast
 
@@ -105,17 +104,14 @@ class LazyBuffer:
     # TODO: does children have to be a ref count instead of a set? can a Buffer be a double child?
     self.children : weakref.WeakSet[LazyBuffer] = weakref.WeakSet()
     # NOTE: op should be read only after construction of LazyBuffer
-    for x in get_buffers(op):
-      x.children.add(self)
-    if not LAZY:
-      self.realize()
+    for x in get_buffers(op): x.children.add(self)
+    if not LAZY: self.realize()
 
   def __repr__(self): return f"<LB {self.shape} op:{self.op.op if self.realized is None else 'realized'}>"
 
   # this produces a device buffer
   def realize(self:LazyBuffer, required_device=None) -> DeviceBuffer:
-    if required_device is not None:
-      assert required_device == self.device
+    assert required_device is None or required_device == self.device
     if self.realized is None:
       # get real ops first
       if self.op.op == LoadOps.FROMCPU:
@@ -162,8 +158,7 @@ class LazyBuffer:
   def contiguous(self:LazyBuffer) -> LazyBuffer: return LazyBuffer(self.device, self.shape, LoadOps, LazyOp(LoadOps.CONTIGUOUS, (self,)))
 
   def reduce_op(self:LazyBuffer, op:ReduceOps, new_shape:Tuple[int, ...]) -> LazyBuffer:
-    if self.shape == tuple(new_shape):
-      return self
+    if self.shape == tuple(new_shape): return self
     reduce = list(enumerate(zip(self.shape, new_shape)))
     # move the reduce axes to the end
     x = self.movement_op(MovementOps.PERMUTE, tuple([i for i,(s,n) in reduce if s == n] + [i for i,(s,n) in reduce if s != n]))
@@ -224,16 +219,13 @@ class LazyBuffer:
         out.append(curr)
         if len(new_shape) == len(out) and all(prod(i) == j and len(i) >= 1 for i,j in zip(out, new_shape)):
           return out
-      contraction = get_contraction(self.op.src[0].shape, self.shape)
-      if contraction is not None:
-        numbered = []
-        start = 0
+      if contraction := get_contraction(self.op.src[0].shape, self.shape):
+        numbered, start = [], 0
         for c in contraction:
           numbered.append(list(range(start, start+len(c))))
           start += len(c)
         new_arg = []
-        for p in arg:
-          new_arg += numbered[p]
+        for p in arg: new_arg += numbered[p]
         self.op.src[0].children.discard(self)   # this changes nothing?
         return self.op.src[0].movement_op(MovementOps.PERMUTE, tuple(new_arg)) \
           .movement_op(MovementOps.RESHAPE, ShapeTracker(self.st).movement_op(op, arg).shape)
diff --git a/tinygrad/nn/optim.py b/tinygrad/nn/optim.py
index 3c18d6fdd0..759886c04c 100644
--- a/tinygrad/nn/optim.py
+++ b/tinygrad/nn/optim.py
@@ -6,8 +6,7 @@ class Optimizer:
   def __init__(self, params : List[Tensor]):
     # if it's None, but being put into an optimizer, set it to True
     for x in params:
-      if x.requires_grad is None:
-        x.requires_grad = True
+      if x.requires_grad is None: x.requires_grad = True
 
     self.params : List[Tensor] = [x for x in params if x.requires_grad]
     self.buffers : List[Tensor] = [x for x in params if not x.requires_grad]   # buffers are still realized
@@ -20,8 +19,7 @@ class Optimizer:
       param.grad.assign(param.grad.clip(-(amount**2), (amount**2)))
 
   def zero_grad(self):
-    for param in self.params:
-      param.grad = None
+    for param in self.params: param.grad = None
 
   def realize(self, extra=None):
     # TODO: corealize
@@ -83,9 +81,7 @@ def get_parameters(obj) -> List[Tensor]:
   if isinstance(obj, Tensor):
     parameters.append(obj)
   elif isinstance(obj, (list, tuple)):
-    for x in obj:
-      parameters.extend(get_parameters(x))
+    for x in obj: parameters.extend(get_parameters(x))
   elif hasattr(obj, '__dict__'):
-    for v in obj.__dict__.values():
-      parameters.extend(get_parameters(v))
+    for v in obj.__dict__.values(): parameters.extend(get_parameters(v))
   return parameters
diff --git a/tinygrad/ops.py b/tinygrad/ops.py
index 9df5bfe193..510fb9b13a 100644
--- a/tinygrad/ops.py
+++ b/tinygrad/ops.py
@@ -124,8 +124,7 @@ class ASTRunner:
   def lower(self, bufs) -> List[RawBuffer]: return [x.raw() for i,x in enumerate(bufs) if x is not None and i not in self.bufs_to_delete]
   def __call__(self, bufs):
     if getenv("OPTLOCAL") and self.global_size is not None and self.local_size is None: self.local_size = self.optimize_local_size(bufs)
-    et = self.clprg(self.global_size, self.local_size, *bufs, wait=DEBUG>=2)
-    if et is not None: GlobalCounters.time_sum_s += et
+    if et := self.clprg(self.global_size, self.local_size, *bufs, wait=DEBUG>=2): GlobalCounters.time_sum_s += et
     if DEBUG >= 1:
       print(f"**** {GlobalCounters.kernel_count:4d} {self.name:20s} args {len(bufs):5d}  kernels {str(self.global_size):18s} {str(self.local_size):12s} OPs {self.op_estimate/1e6:7.1f}M/{GlobalCounters.global_ops/1e9:7.2f}G  mem {GlobalCounters.mem_used/1e9:5.2f} GB " +
             (str() if et is None else f"tm {et*1e6:9.2f}us/{GlobalCounters.time_sum_s*1e3:9.2f}ms ({self.op_estimate/(et*1e9):8.2f} GFLOPS)"))
diff --git a/tinygrad/runtime/ops_clang.py b/tinygrad/runtime/ops_clang.py
index c33a6fe5d9..0557111cc2 100644
--- a/tinygrad/runtime/ops_clang.py
+++ b/tinygrad/runtime/ops_clang.py
@@ -35,6 +35,4 @@ class ClangCodegen(GPUCodegen):
   lang = GPULanguage(buffer_suffix="restrict")
 
 class ClangBuffer(CompiledBuffer):
-  raw_buffer_type = RawMallocBuffer
-  codegen_type = ClangCodegen
-  runtime_type = ClangProgram
+  raw_buffer_type, codegen_type, runtime_type = RawMallocBuffer, ClangCodegen, ClangProgram
diff --git a/tinygrad/runtime/ops_cuda.py b/tinygrad/runtime/ops_cuda.py
index 927492b2c9..b2b4c5eda5 100644
--- a/tinygrad/runtime/ops_cuda.py
+++ b/tinygrad/runtime/ops_cuda.py
@@ -40,6 +40,4 @@ class CUDACodegen(GPUCodegen):
     lid = [f'threadIdx.{chr(120+i)}' for i in range(3)])
 
 class CUDABuffer(CompiledBuffer):
-  raw_buffer_type = RawCUDABuffer
-  codegen_type = CUDACodegen
-  runtime_type = CUDAProgram
+  raw_buffer_type, codegen_type, runtime_type = RawCUDABuffer, CUDACodegen, CUDAProgram
diff --git a/tinygrad/runtime/ops_llvm.py b/tinygrad/runtime/ops_llvm.py
index 7d17fa2b3f..2b156c9c77 100644
--- a/tinygrad/runtime/ops_llvm.py
+++ b/tinygrad/runtime/ops_llvm.py
@@ -27,8 +27,7 @@ class LLVM:
     # TODO: this makes compile times so much faster
     if getenv("LLVMOPT"):
       llvm.set_option(str(), '-force-vector-interleave=4')  # this makes sum the same speed as torch, it also doubles the (slow) conv speed
-      if DEBUG >= 4:
-        llvm.set_option(str(), '--debug-only=loop-vectorize')
+      if DEBUG >= 4: llvm.set_option(str(), '--debug-only=loop-vectorize')
       #llvm.set_option(str(), '--debug')
 
       # does this do anything?
@@ -64,6 +63,4 @@ class LLVMProgram:
     if wait: return time.monotonic()-st
 
 class LLVMBuffer(CompiledBuffer):
-  raw_buffer_type = RawMallocBuffer
-  codegen_type = LLVMCodegen
-  runtime_type = LLVMProgram
+  raw_buffer_type, codegen_type, runtime_type = RawMallocBuffer, LLVMCodegen, LLVMProgram
diff --git a/tinygrad/runtime/ops_metal.py b/tinygrad/runtime/ops_metal.py
index e37b46a729..fe1b6fcfff 100644
--- a/tinygrad/runtime/ops_metal.py
+++ b/tinygrad/runtime/ops_metal.py
@@ -84,6 +84,4 @@ class MetalCodegen(GPUCodegen):
     extra_args = ['uint3 gid [[thread_position_in_grid]]', 'uint3 lid [[thread_position_in_threadgroup]]'])
 
 class MetalBuffer(CompiledBuffer):
-  raw_buffer_type = RawMetalBuffer
-  codegen_type = MetalCodegen
-  runtime_type = MetalProgram
\ No newline at end of file
+  raw_buffer_type, codegen_type, runtime_type = RawMetalBuffer, MetalCodegen, MetalProgram
diff --git a/tinygrad/shape/__init__.py b/tinygrad/shape/__init__.py
index 288943082c..75919edda3 100644
--- a/tinygrad/shape/__init__.py
+++ b/tinygrad/shape/__init__.py
@@ -29,8 +29,7 @@ class View:
     ret = [Variable.num(self.offset+offset)]
     acc = 1
     for d,s in self.shape_strides[::-1]:
-      if d != 1 and s != 0:
-        ret.append(((idx//acc)%d)*s)
+      ret.append(((idx//acc)%d)*s)
       acc *= d
     return Variable.sum(ret)
 
@@ -63,8 +62,7 @@ ViewTypes = Union[View, ZeroView]
 @functools.lru_cache(maxsize=None)
 def strides_for_shape(shape:Tuple[int, ...]) -> Tuple[int, ...]:
   strides = [1]
-  for d in shape[::-1][:-1]:
-    strides = [d*strides[0]] + strides
+  for d in shape[::-1][:-1]: strides = [d*strides[0]] + strides
   return tuple(st if s != 1 else 0 for st, s in zip(strides, shape))
 
 @functools.lru_cache(maxsize=None)
@@ -73,8 +71,7 @@ def view_from_shape(shape:Tuple[int, ...]) -> View:
   return View(tuple(shape), strides_for_shape(shape))
 
 def merge_views(vm2:View, vm1:View) -> Optional[View]:
-  new_strides = []
-  new_offset = vm2.expr_node(Variable.num(vm1.offset))
+  new_strides, new_offset = [], vm2.expr_node(Variable.num(vm1.offset))
   assert isinstance(new_offset, NumNode), "new_offset wasn't a number?!?"
   for s,st in zip(vm1.shape, vm1.strides):
     this_dim = View(vm2.shape, vm2.strides).expr_node(Variable('idx', 0, s-1)*st)
@@ -147,8 +144,7 @@ class ShapeTracker:
     assert prod(self.shape) == prod(new_shape), f"can't reshape {self.shape} -> {new_shape}"
 
     view = View(new_shape, strides_for_shape(new_shape))
-    if self.contiguous:
-      self.views[-1] = view   # NOTE: if it's contiguous it can't have an offset
+    if self.contiguous: self.views[-1] = view   # NOTE: if it's contiguous it can't have an offset
     else:
       # NOTE: the last view in self.views is never a ZeroView
       if (merged_view := merge_views(cast(View, self.views[-1]), view)) is not None: self.views[-1] = merged_view
diff --git a/tinygrad/shape/symbolic.py b/tinygrad/shape/symbolic.py
index 4db61ff0bb..b6905b1841 100644
--- a/tinygrad/shape/symbolic.py
+++ b/tinygrad/shape/symbolic.py
@@ -4,7 +4,7 @@ from typing import List, Dict, Callable, Type, Union
 from tinygrad.helpers import partition, all_same
 
 # NOTE: Python has different behavior for negative mod and floor div than c
-# symbolic matches the Python behavior, but the code is outputs is agnostic, and will never have negative numbers in div or mod
+# symbolic matches the Python behavior, but the code output is agnostic, and will never have negative numbers in div or mod
 
 def create_node(typ:Type[Node], *args):
   ret = typ(*args)
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index 3a77bdb164..b7bd2146be 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -157,8 +157,7 @@ class Tensor:
       visited.add(node)
       if node._ctx:
         for i in node._ctx.parents:
-          if i not in visited:
-            _deepwalk(i, visited, nodes)
+          if i not in visited: _deepwalk(i, visited, nodes)
         nodes.append(node)
       return nodes
     return _deepwalk(self, set(), [])