mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
less outdated abstraction.py (#2917)
removed some old terms and updated types and code pointers
This commit is contained in:
@@ -3,17 +3,17 @@ Welcome to the tinygrad documentation
|
||||
=================
|
||||
|
||||
this file will take you on a whirlwind journey from a Tensor all the way down
|
||||
tinygrad has been aggressively refactored in the 2.5 years it's been worked on.
|
||||
tinygrad has been aggressively refactored in the 3 years it's been worked on.
|
||||
what you see here is a refined library (with more refining to go still!)
|
||||
|
||||
the whole tinygrad is ~2300 lines, so while it's readable in an evening or two,
|
||||
the whole tinygrad is < 5000 lines, so while it's readable in an evening or two,
|
||||
this documentation will help with entry points and understanding the abstraction stack
|
||||
"""
|
||||
|
||||
# %%
|
||||
# == Boilerplate imports for typing ==
|
||||
from __future__ import annotations
|
||||
from typing import Optional, Tuple, Union, Any, Dict, Callable, Type, List, ClassVar
|
||||
from typing import Optional, Tuple, Union, Any, Dict, Callable, Type, List
|
||||
from enum import Enum, auto
|
||||
from abc import ABC
|
||||
|
||||
@@ -91,12 +91,12 @@ class LazyBuffer:
|
||||
# this LazyOp describes the computation needed to realize this LazyBuffer
|
||||
op: Optional[LazyOp]
|
||||
|
||||
# LazyOp (in tinygrad/ops.py, code 4/10)
|
||||
# LazyOp (in tinygrad/ops.py, code 5/10)
|
||||
# in a tree they form an Abstract Syntax Tree for a single GPU kernel
|
||||
class LazyOp:
|
||||
op: Op # the type of the compute
|
||||
src: Tuple[Union[LazyOp, LazyBuffer], ...] # the sources
|
||||
arg: Optional[Any] = None # and an optional static argument
|
||||
src: Tuple[LazyOp, ...] # the sources
|
||||
arg: Any = None # and an optional static argument
|
||||
|
||||
# there's currently 26 Ops you have to implement for an accelerator.
|
||||
class UnaryOps(Enum): EXP2 = auto(); LOG2 = auto(); CAST = auto(); SIN = auto(); SQRT = auto()
|
||||
@@ -105,12 +105,12 @@ class ReduceOps(Enum): SUM = auto(); MAX = auto()
|
||||
class MovementOps(Enum): RESHAPE = auto(); PERMUTE = auto(); EXPAND = auto(); PAD = auto(); SHRINK = auto(); STRIDE = auto()
|
||||
class TernaryOps(Enum): MULACC = auto(); WHERE = auto()
|
||||
class LoadOps(Enum): EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto()
|
||||
# NOTE: if you have a CompiledBuffer(DeviceBuffer)
|
||||
# NOTE: if you have a Compiled device
|
||||
# you do not need to implement the MovementOps
|
||||
# as they are handled by the ShapeTracker(in tinygrad/shape/shapetracker.py, code 7/10)
|
||||
# as they are handled by the ShapeTracker (in tinygrad/shape/shapetracker.py, code 7/10)
|
||||
Op = Union[UnaryOps, BinaryOps, ReduceOps, MovementOps, TernaryOps, LoadOps]
|
||||
|
||||
# most of tinygrad/lazy.py is concerned with fusing Ops into LazyOps ASTs that map to GPUKernels
|
||||
# most of tinygrad/lazy.py is concerned with fusing Ops into LazyOps ASTs that map to kernels
|
||||
# it's beyond the scope of this tutorial, but you can read the file if interested
|
||||
|
||||
# %%
|
||||
@@ -119,6 +119,7 @@ Op = Union[UnaryOps, BinaryOps, ReduceOps, MovementOps, TernaryOps, LoadOps]
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.ops import LazyOp, BinaryOps, LoadOps
|
||||
from tinygrad.lazy import LazyBuffer
|
||||
from tinygrad.device import Buffer
|
||||
|
||||
# the 2+3 from before
|
||||
result = Tensor([2]) + Tensor([3])
|
||||
@@ -135,19 +136,20 @@ assert len(lazyop.srcs) == 2
|
||||
# again, a LazyOp AST is like a GPU kernel. you have to copy the data on the device first
|
||||
assert lazyop.srcs[0].op == LoadOps.COPY
|
||||
assert lazyop.srcs[0].srcs[0].device == "CPU"
|
||||
assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2.]"
|
||||
assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2]"
|
||||
assert result.lazydata.base.realized is None, "the LazyBuffer is not realized yet"
|
||||
|
||||
# now we realize the LazyBuffer
|
||||
result.realize()
|
||||
assert result.lazydata.base.realized is not None, "the LazyBuffer is realized!"
|
||||
# this brings us nicely to DeviceBuffer, of which the realized ClangBuffer is a subclass
|
||||
#assert 'RawMallocBuffer' in str(type(result.lazydata.base.realized))
|
||||
# getting ahead of ourselves, but we can copy the DeviceBuffer toCPU
|
||||
# this brings us nicely to Buffer
|
||||
assert isinstance(result.lazydata.base.realized, Buffer)
|
||||
assert result.lazydata.base.realized.device == "CLANG"
|
||||
# getting ahead of ourselves, but we can move the Buffer to CPU
|
||||
assert result.lazydata.base.realized.toCPU()[0] == 5, "when put in numpy with toCPU, it's 5"
|
||||
|
||||
# %%
|
||||
# == Union[Interpreted, Compiled] (in tinygrad/ops.py, code 5/10) ==
|
||||
# == Union[Interpreted, Compiled] (in tinygrad/device.py, code 6/10) ==
|
||||
|
||||
# Now you have a choice, you can either write a "Interpreted" backend or "Compiled" backend
|
||||
|
||||
@@ -204,7 +206,6 @@ from tinygrad.runtime.ops_clang import ClangProgram, compile_clang
|
||||
# first we create two numpy buffers containing 2 and 3
|
||||
# then we copy the numpy in to RawMallocBuffers
|
||||
# last, we create an empty output buffer
|
||||
from tinygrad.helpers import dtypes
|
||||
input_a, input_b = MallocAllocator.alloc(4), MallocAllocator.alloc(4)
|
||||
output = MallocAllocator.alloc(4)
|
||||
|
||||
@@ -236,7 +237,6 @@ class UOp:
|
||||
dtype: Optional[DType]
|
||||
vin: Tuple[UOp, ...]
|
||||
arg: Any
|
||||
num: int # UOps are unique
|
||||
|
||||
class Linearizer:
|
||||
# create the kernel with the AST
|
||||
@@ -248,7 +248,7 @@ class Linearizer:
|
||||
uops: List[UOp]
|
||||
|
||||
from tinygrad.tensor import Tensor
|
||||
result = Tensor(2).realize() + Tensor(3).realize()
|
||||
result = Tensor(2.0).realize() + Tensor(3.0).realize()
|
||||
|
||||
# use the real Linearizer to linearize 2+3
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
@@ -261,7 +261,7 @@ for uop in linearizer.uops: print(uop)
|
||||
|
||||
# output:
|
||||
"""
|
||||
0 UOps.DEFINE_GLOBAL : ptr.dtypes.float [] ('data0', dtypes.float)
|
||||
0 UOps.DEFINE_GLOBAL : ptr.dtypes.float [] data0
|
||||
1 UOps.CONST : dtypes.float [] 2.0
|
||||
2 UOps.CONST : dtypes.float [] 3.0
|
||||
3 UOps.ALU : dtypes.float [1, 2] BinaryOps.ADD
|
||||
@@ -275,7 +275,7 @@ for uop in linearizer.uops: print(uop)
|
||||
# here, we have an example where we fetch the generated code from the JIT
|
||||
|
||||
from tinygrad.tensor import Tensor
|
||||
result = Tensor(2) + Tensor(3)
|
||||
result = Tensor(2.0) + Tensor(3.0)
|
||||
|
||||
# we have a global cache used by the JIT
|
||||
# from there, we can see the generated clang code
|
||||
@@ -290,7 +290,6 @@ assert len(cache_saved) == 1
|
||||
# print the C Program :)
|
||||
print(cache_saved[0].prg.prg)
|
||||
|
||||
# after some formatting (the compiler doesn't care)
|
||||
# NOTE: the 2 and 3 are constant folded
|
||||
"""
|
||||
void E_n2(float* restrict data0) {
|
||||
|
||||
Reference in New Issue
Block a user