* more work on docs

* CompilerOptions is dataclass
This commit is contained in:
George Hotz
2024-04-24 06:52:42 +04:00
committed by GitHub
parent 6637ecc5fe
commit 60e3aa5cb1
6 changed files with 35 additions and 13 deletions

View File

@@ -39,8 +39,7 @@ optim._step() # this will step the optimizer without running realize
# l1.lazydata and l2.lazydata define a computation graph
from tinygrad.ops import ScheduleItem
from tinygrad.engine.schedule import create_schedule
schedule: List[ScheduleItem] = create_schedule([l1.lazydata, l2.lazydata])
schedule: List[ScheduleItem] = Tensor.schedule(l1, l2)
print(f"The schedule contains {len(schedule)} items.")
for si in schedule: print(str(si)[:80])

View File

@@ -1,21 +1,40 @@
The tinygrad framework has four pieces
* a PyTorch like <b>frontend</b>.
* a <b>scheduler</b> which breaks the compute into kernels.
* a <b>lowering</b> engine which converts ASTs into code that can run on the accelerator.
* an <b>execution</b> engine which can run that code.
## Frontend
Everything in [Tensor](tensor.md) is syntactic sugar around [function.py](function.md), where the forwards and backwards passes are implemented for the different ops. That goes on to construct a graph of
Everything in [Tensor](tensor.md) is syntactic sugar around [function.py](function.md), where the forwards and backwards passes are implemented for the different mlops. There's about 25 of them, implemented using about 20 basic ops. Those basic ops go on to construct a graph of:
::: tinygrad.lazy.LazyBuffer
options:
show_source: false
## Lowering
The `LazyBuffer` graph specifies the compute in terms of low level tinygrad ops. Not all LazyBuffers will actually become realized. There's two types of LazyBuffers, base and view. base contains compute into a contiguous buffer, and view is a view (specified by a ShapeTracker). Inputs to a base can be either base or view, inputs to a view can only be a single base.
The [scheduler](/tinygrad/engine/schedule.py) converts the graph of LazyBuffers into a list of `ScheduleItem`. `ast` specifies what compute to run, and `bufs` specifies what buffers to run it on.
## Scheduling
The [scheduler](/tinygrad/engine/schedule.py) converts the graph of LazyBuffers into a list of `ScheduleItem`. One `ScheduleItem` is one kernel on the GPU, and the scheduler is responsible for breaking the large compute graph into subgraphs that can fit in a kernel. `ast` specifies what compute to run, and `bufs` specifies what buffers to run it on.
::: tinygrad.ops.ScheduleItem
## Lowering
The code in [realize](/tinygrad/engine/realize.py) lowers `ScheduleItem` to `ExecItem` with
::: tinygrad.engine.realize.lower_schedule
There's a ton of complexity hidden behind this, see the `codegen/` directory.
First we lower the AST to UOps, which is a linear list of the compute to be run. This is where the BEAM search happens. The UOps can be changed by `CompilerOptions`.
::: tinygrad.device.CompilerOptions
Then we render the UOps into code, then we compile the code to binary.
## Execution
Creating `ExecItem`, which has a run method

View File

@@ -1,6 +1,4 @@
Welcome to the docs for tinygrad. This page is for users of the tinygrad library. We also have [developer docs](developer.md)
tinygrad is not 1.0 yet, but it will be soon. The API has been pretty stable for a while.
Welcome to the docs for tinygrad. This page is for users of the tinygrad library. tinygrad is not 1.0 yet, but it will be soon. The API has been pretty stable for a while.
While you can `pip install tinygrad`, we encourage you to install from source:
@@ -10,6 +8,8 @@ cd tinygrad
python3 -m pip install -e .
```
We also have [developer docs](developer.md), and Di Zhu has created a [bunch of tutorials](https://mesozoic-egg.github.io/tinygrad-notes/) to help understand how tinygrad works.
## tinygrad Usage
The main class you will interact with is [Tensor](tensor.md). It functions very similarly to PyTorch, but has a bit more of a functional style. tinygrad supports [many datatypes](dtypes.md). All operations in tinygrad are lazy, meaning they won't do anything until you realize.

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
from collections import defaultdict
from typing import TYPE_CHECKING, Any, List, Optional, Dict, Tuple, ClassVar, NamedTuple, cast
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, List, Optional, Dict, Tuple, ClassVar, cast
import importlib, inspect, functools, pathlib, time, ctypes, os
from tinygrad.helpers import prod, getenv, colored, all_int, to_function_name, from_mv, flat_mv, diskcache_get, diskcache_put, DEBUG, BEAM, NOOPT
from tinygrad.shape.symbolic import Variable, sym_infer, sint
@@ -114,7 +115,8 @@ MallocAllocator = _MallocAllocator()
# **************** for Compiled Devices ****************
class CompilerOptions(NamedTuple):
@dataclass(frozen=True)
class CompilerOptions:
device: str = ""
suffix: str = ""
# TODO: make this generic with a list of supported types

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import subprocess, hashlib, tempfile, ctypes, ctypes.util, functools, re
from pathlib import Path
from dataclasses import replace
from typing import Tuple, Optional, List
import tinygrad.runtime.autogen.cuda as cuda
from tinygrad.helpers import DEBUG, getenv, from_mv, to_char_p_p, init_c_var, init_c_struct_t, colored, cpu_time_execution
@@ -57,7 +58,7 @@ class PTXCompiler(Compiler):
def __init__(self, arch:str):
self.arch = arch
self.version = "7.8" if arch >= "sm_89" else "7.5"
PTXCompiler.compiler_opts = PTXCompiler.compiler_opts._replace(has_tensor_cores=int(arch[3:]) >= 80)
PTXCompiler.compiler_opts = replace(PTXCompiler.compiler_opts, has_tensor_cores=int(arch[3:]) >= 80)
super().__init__(f"compile_ptx_{self.arch}")
def render(self, name:str, uops) -> str: return PTXRenderer(name, uops).replace("TARGET", self.arch).replace("VERSION", self.version)
def compile(self, src:str) -> bytes: return src.encode()
@@ -66,7 +67,7 @@ class CUDACompiler(Compiler):
compiler_opts = CompilerOptions("CUDA", global_max=[65535, 65535, 2147483647], local_max=[64, 1024, 1024], shared_max=49152)
def __init__(self, arch:str):
self.arch = arch
CUDACompiler.compiler_opts = CUDACompiler.compiler_opts._replace(has_tensor_cores=int(arch[3:]) >= 80)
CUDACompiler.compiler_opts = replace(CUDACompiler.compiler_opts, has_tensor_cores=int(arch[3:]) >= 80)
check(cuda.nvrtcVersion((nvrtcMajor := ctypes.c_int()), (nvrtcMinor := ctypes.c_int())))
self.compile_options = [f'--gpu-architecture={arch}', "-I/usr/local/cuda/include", "-I/usr/include", "-I/opt/cuda/include/"]
if (nvrtcMajor.value, nvrtcMinor.value) >= (12, 4): self.compile_options.append("--minimal")

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import os, ctypes, pathlib, re, fcntl, functools, mmap, struct, tempfile, hashlib, subprocess, time
from typing import Tuple, List, Any
from dataclasses import replace
from tinygrad.device import Compiled, LRUAllocator, Compiler, BufferOptions, CompilerOptions
from tinygrad.helpers import getenv, from_mv, init_c_struct_t, to_mv, round_up, to_char_p_p, DEBUG
from tinygrad.renderer.cstyle import CUDARenderer
@@ -69,7 +70,7 @@ class NVCompiler(Compiler):
compiler_opts = CompilerOptions("NV", global_max=[65535, 65535, 2147483647], local_max=[64, 1024, 1024], shared_max=49152)
def __init__(self, arch:str):
self.arch = arch
NVCompiler.compiler_opts = NVCompiler.compiler_opts._replace(has_tensor_cores=int(arch[3:]) >= 80)
NVCompiler.compiler_opts = replace(NVCompiler.compiler_opts, has_tensor_cores=int(arch[3:]) >= 80)
cuda_check(cuda.nvrtcVersion((nvrtcMajor := ctypes.c_int()), (nvrtcMinor := ctypes.c_int())))
self.compile_options = [f'--gpu-architecture={arch}', "-I/usr/local/cuda/include", "-I/usr/include", "-I/opt/cuda/include/"]
if (nvrtcMajor.value, nvrtcMinor.value) >= (12, 4): self.compile_options.append("--minimal")