add changeable DEBUG (#816)

This commit is contained in:
George Hotz
2023-05-27 13:28:25 -07:00
committed by GitHub
parent a3feee29c5
commit 1e56aced05
3 changed files with 10 additions and 70 deletions

View File

@@ -1,68 +0,0 @@
Getting the core instruction set correct is the value of tinygrad
Max size tensor is 6-D for the pool2d
Unary Ops
===
These are the simplest to reason about, and have pointwise mem access.
A and B are always the same size
Forward : A -> B
Backward (binary): (B', A) -> A'
Reduce Ops (with axis)
===
These take in an axis argument. B is smaller than A
Max and Sum are pretty different, do we really need Max?
Forward : A -> B
Backward : B' -> A'
Binary Ops (with broadcasting)
===
Pointwise mem access also.
Broadcasting adds complexity, aliased input.
Unbroadcasting for grad is a sum, but should be combined with the ternary op.
Forward : (A, B) -> C
Backward (ternary): (C', A, B) -> (A', B')
C.shape = max(A.shape, B.shape)
Movement Ops (2 or 1)
===
Reshape, Transpose, Slice
Depending on your Tensor implementation, these are free.
Reshape is almost always free.
Slice can be made free, but probably shouldn't be.
Transpose is hard to make free except in trivial cases.
Regardless, these are "reindexings" of existing arrays
Transpose and Slice are similar enough I think they can be merged.
They should use a DMA engine
Processing Ops (4)
===
Matmul is 1 matmul for forward, 2 for backward.
* It's actually three matmuls transposed
* cublasSgemm()
Conv2D is very complex. It seems to need three.
* cudnnConvolutionForward()
* cudnnConvolutionBackwardData()
* cudnnConvolutionBackwardFilter()
NOTE: Tensor Cores require that the tensors be in the NHWC data layout

View File

@@ -20,7 +20,14 @@ def mnum(i) -> str: return str(i) if i >= 0 else f"m{-i}"
@functools.lru_cache(maxsize=None)
def getenv(key, default=0): return type(default)(os.getenv(key, default))
DEBUG, IMAGE = getenv("DEBUG", 0), getenv("IMAGE", 0)
# NOTE: hack to allow DEBUG(x) to set the debug value at runtime
class DebugSingleton:
def __init__(self): self.value = getenv("DEBUG", 0)
def __call__(self, x): self.value = x
def __bool__(self): return self.value != 0
def __ge__(self, x): return self.value >= x
DEBUG, IMAGE = DebugSingleton(), getenv("IMAGE", 0)
# **** tinygrad now supports dtypes! *****

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
from typing import Optional, Tuple, Union, List, Dict, Any, cast
import sys, weakref, importlib, inspect, functools, pathlib
from weakref import WeakValueDictionary
from tinygrad.helpers import prod, getenv, DType, dtypes, LazyNumpyArray, flatten, ImageDType
from tinygrad.helpers import prod, getenv, DType, dtypes, LazyNumpyArray, flatten, ImageDType, DEBUG
from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
from tinygrad.ops import Compiled, Interpreted, UnaryOps, BinaryOps, ReduceOps, MovementOps, LoadOps, OpType, LazyOp, get_lazyops, get_buffers, map_buffers
from tinygrad.runtime.lib import RawConst, RawBuffer
@@ -111,6 +111,7 @@ class LazyBuffer:
if prod(self.op.arg.shape) == 1 and hasattr(Device[self.device].codegen, 'supports_constant_folding'):
self.realized = RawConst(1, dtypes.from_np(self.op.arg.dtype), self.op.arg().flatten()[0])
else:
if DEBUG >= 4: print(f"copying {self.op.arg.shape}:{dtypes.from_np(self.op.arg.dtype)} -> {self.device}")
self.realized = Device[self.device].buffer.fromCPU(self.op.arg(), **self._device_extra_args())
elif self.op.op == LoadOps.CONTIGUOUS:
realized = self.op.src[0].realize().realized