less outdated abstraction.py (#2917)

removed some old terms and updated types and code pointers
2026-01-10 07:28:15 -05:00 · 2023-12-22 15:31:02 -05:00
parent 50927defad
commit 3ba591c3fd
1 changed files with 19 additions and 20 deletions
--- a/docs/abstractions.py
+++ b/docs/abstractions.py
@@ -3,17 +3,17 @@ Welcome to the tinygrad documentation
 =================

 this file will take you on a whirlwind journey from a Tensor all the way down
-tinygrad has been aggressively refactored in the 2.5 years it's been worked on.
+tinygrad has been aggressively refactored in the 3 years it's been worked on.
 what you see here is a refined library (with more refining to go still!)

-the whole tinygrad is ~2300 lines, so while it's readable in an evening or two,
+the whole tinygrad is < 5000 lines, so while it's readable in an evening or two,
 this documentation will help with entry points and understanding the abstraction stack
 """

 # %%
 # == Boilerplate imports for typing ==
 from __future__ import annotations
-from typing import Optional, Tuple, Union, Any, Dict, Callable, Type, List, ClassVar
+from typing import Optional, Tuple, Union, Any, Dict, Callable, Type, List
 from enum import Enum, auto
 from abc import ABC

@@ -91,12 +91,12 @@ class LazyBuffer:
  # this LazyOp describes the computation needed to realize this LazyBuffer
  op: Optional[LazyOp]

-# LazyOp (in tinygrad/ops.py, code 4/10)
+# LazyOp (in tinygrad/ops.py, code 5/10)
 # in a tree they form an Abstract Syntax Tree for a single GPU kernel
 class LazyOp:
  op: Op                                       # the type of the compute
-  src: Tuple[Union[LazyOp, LazyBuffer], ...]   # the sources
-  arg: Optional[Any] = None                    # and an optional static argument
+  src: Tuple[LazyOp, ...]                      # the sources
+  arg: Any = None                              # and an optional static argument

 # there's currently 26 Ops you have to implement for an accelerator.
 class UnaryOps(Enum):    EXP2 = auto(); LOG2 = auto(); CAST = auto(); SIN = auto();   SQRT = auto()
@@ -105,12 +105,12 @@ class ReduceOps(Enum):   SUM = auto();  MAX = auto()
 class MovementOps(Enum): RESHAPE = auto(); PERMUTE = auto(); EXPAND = auto(); PAD = auto(); SHRINK = auto(); STRIDE = auto()
 class TernaryOps(Enum):  MULACC = auto(); WHERE = auto()
 class LoadOps(Enum):     EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto()
-# NOTE: if you have a CompiledBuffer(DeviceBuffer)
+# NOTE: if you have a Compiled device
 #       you do not need to implement the MovementOps
-#       as they are handled by the ShapeTracker(in tinygrad/shape/shapetracker.py, code 7/10)
+#       as they are handled by the ShapeTracker (in tinygrad/shape/shapetracker.py, code 7/10)
 Op = Union[UnaryOps, BinaryOps, ReduceOps, MovementOps, TernaryOps, LoadOps]

-# most of tinygrad/lazy.py is concerned with fusing Ops into LazyOps ASTs that map to GPUKernels
+# most of tinygrad/lazy.py is concerned with fusing Ops into LazyOps ASTs that map to kernels
 # it's beyond the scope of this tutorial, but you can read the file if interested

 # %%
@@ -119,6 +119,7 @@ Op = Union[UnaryOps, BinaryOps, ReduceOps, MovementOps, TernaryOps, LoadOps]
 from tinygrad.tensor import Tensor
 from tinygrad.ops import LazyOp, BinaryOps, LoadOps
 from tinygrad.lazy import LazyBuffer
+from tinygrad.device import Buffer

 # the 2+3 from before
 result = Tensor([2]) + Tensor([3])
@@ -135,19 +136,20 @@ assert len(lazyop.srcs) == 2
 # again, a LazyOp AST is like a GPU kernel. you have to copy the data on the device first
 assert lazyop.srcs[0].op == LoadOps.COPY
 assert lazyop.srcs[0].srcs[0].device == "CPU"
-assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2.]"
+assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2]"
 assert result.lazydata.base.realized is None, "the LazyBuffer is not realized yet"

 # now we realize the LazyBuffer
 result.realize()
 assert result.lazydata.base.realized is not None, "the LazyBuffer is realized!"
-# this brings us nicely to DeviceBuffer, of which the realized ClangBuffer is a subclass
-#assert 'RawMallocBuffer' in str(type(result.lazydata.base.realized))
-# getting ahead of ourselves, but we can copy the DeviceBuffer toCPU
+# this brings us nicely to Buffer
+assert isinstance(result.lazydata.base.realized, Buffer)
+assert result.lazydata.base.realized.device == "CLANG"
+# getting ahead of ourselves, but we can move the Buffer to CPU
 assert result.lazydata.base.realized.toCPU()[0] == 5, "when put in numpy with toCPU, it's 5"

 # %%
-# == Union[Interpreted, Compiled] (in tinygrad/ops.py, code 5/10) ==
+# == Union[Interpreted, Compiled] (in tinygrad/device.py, code 6/10) ==

 # Now you have a choice, you can either write a "Interpreted" backend or "Compiled" backend

@@ -204,7 +206,6 @@ from tinygrad.runtime.ops_clang import ClangProgram, compile_clang
 # first we create two numpy buffers containing 2 and 3
 # then we copy the numpy in to RawMallocBuffers
 # last, we create an empty output buffer
-from tinygrad.helpers import dtypes
 input_a, input_b = MallocAllocator.alloc(4), MallocAllocator.alloc(4)
 output = MallocAllocator.alloc(4)

@@ -236,7 +237,6 @@ class UOp:
  dtype: Optional[DType]
  vin: Tuple[UOp, ...]
  arg: Any
-  num: int  # UOps are unique

 class Linearizer:
  # create the kernel with the AST
@@ -248,7 +248,7 @@ class Linearizer:
  uops: List[UOp]

 from tinygrad.tensor import Tensor
-result = Tensor(2).realize() + Tensor(3).realize()
+result = Tensor(2.0).realize() + Tensor(3.0).realize()

 # use the real Linearizer to linearize 2+3
 from tinygrad.codegen.linearizer import Linearizer
@@ -261,7 +261,7 @@ for uop in linearizer.uops: print(uop)

 # output:
 """
-   0 UOps.DEFINE_GLOBAL  : ptr.dtypes.float          []                               ('data0', dtypes.float)
+   0 UOps.DEFINE_GLOBAL  : ptr.dtypes.float          []                               data0
   1 UOps.CONST          : dtypes.float              []                               2.0
   2 UOps.CONST          : dtypes.float              []                               3.0
   3 UOps.ALU            : dtypes.float              [1, 2]                           BinaryOps.ADD
@@ -275,7 +275,7 @@ for uop in linearizer.uops: print(uop)
 # here, we have an example where we fetch the generated code from the JIT

 from tinygrad.tensor import Tensor
-result = Tensor(2) + Tensor(3)
+result = Tensor(2.0) + Tensor(3.0)

 # we have a global cache used by the JIT
 # from there, we can see the generated clang code
@@ -290,7 +290,6 @@ assert len(cache_saved) == 1
 # print the C Program :)
 print(cache_saved[0].prg.prg)

-# after some formatting (the compiler doesn't care)
 # NOTE: the 2 and 3 are constant folded
 """
 void E_n2(float* restrict data0) {