mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
add changeable DEBUG (#816)
This commit is contained in:
68
docs/design
68
docs/design
@@ -1,68 +0,0 @@
|
||||
Getting the core instruction set correct is the value of tinygrad
|
||||
|
||||
Max size tensor is 6-D for the pool2d
|
||||
|
||||
Unary Ops
|
||||
===
|
||||
|
||||
These are the simplest to reason about, and have pointwise mem access.
|
||||
A and B are always the same size
|
||||
|
||||
Forward : A -> B
|
||||
Backward (binary): (B', A) -> A'
|
||||
|
||||
|
||||
|
||||
Reduce Ops (with axis)
|
||||
===
|
||||
|
||||
These take in an axis argument. B is smaller than A
|
||||
Max and Sum are pretty different, do we really need Max?
|
||||
|
||||
Forward : A -> B
|
||||
Backward : B' -> A'
|
||||
|
||||
|
||||
|
||||
Binary Ops (with broadcasting)
|
||||
===
|
||||
|
||||
Pointwise mem access also.
|
||||
Broadcasting adds complexity, aliased input.
|
||||
Unbroadcasting for grad is a sum, but should be combined with the ternary op.
|
||||
|
||||
Forward : (A, B) -> C
|
||||
Backward (ternary): (C', A, B) -> (A', B')
|
||||
|
||||
C.shape = max(A.shape, B.shape)
|
||||
|
||||
|
||||
|
||||
Movement Ops (2 or 1)
|
||||
===
|
||||
|
||||
Reshape, Transpose, Slice
|
||||
|
||||
Depending on your Tensor implementation, these are free.
|
||||
Reshape is almost always free.
|
||||
Slice can be made free, but probably shouldn't be.
|
||||
Transpose is hard to make free except in trivial cases.
|
||||
|
||||
Regardless, these are "reindexings" of existing arrays
|
||||
Transpose and Slice are similar enough I think they can be merged.
|
||||
They should use a DMA engine
|
||||
|
||||
|
||||
|
||||
Processing Ops (4)
|
||||
===
|
||||
|
||||
Matmul is 1 matmul for forward, 2 for backward.
|
||||
* It's actually three matmuls transposed
|
||||
* cublasSgemm()
|
||||
|
||||
Conv2D is very complex. It seems to need three.
|
||||
* cudnnConvolutionForward()
|
||||
* cudnnConvolutionBackwardData()
|
||||
* cudnnConvolutionBackwardFilter()
|
||||
NOTE: Tensor Cores require that the tensors be in the NHWC data layout
|
||||
@@ -20,7 +20,14 @@ def mnum(i) -> str: return str(i) if i >= 0 else f"m{-i}"
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def getenv(key, default=0): return type(default)(os.getenv(key, default))
|
||||
|
||||
DEBUG, IMAGE = getenv("DEBUG", 0), getenv("IMAGE", 0)
|
||||
# NOTE: hack to allow DEBUG(x) to set the debug value at runtime
|
||||
class DebugSingleton:
|
||||
def __init__(self): self.value = getenv("DEBUG", 0)
|
||||
def __call__(self, x): self.value = x
|
||||
def __bool__(self): return self.value != 0
|
||||
def __ge__(self, x): return self.value >= x
|
||||
|
||||
DEBUG, IMAGE = DebugSingleton(), getenv("IMAGE", 0)
|
||||
|
||||
# **** tinygrad now supports dtypes! *****
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
from typing import Optional, Tuple, Union, List, Dict, Any, cast
|
||||
import sys, weakref, importlib, inspect, functools, pathlib
|
||||
from weakref import WeakValueDictionary
|
||||
from tinygrad.helpers import prod, getenv, DType, dtypes, LazyNumpyArray, flatten, ImageDType
|
||||
from tinygrad.helpers import prod, getenv, DType, dtypes, LazyNumpyArray, flatten, ImageDType, DEBUG
|
||||
from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
|
||||
from tinygrad.ops import Compiled, Interpreted, UnaryOps, BinaryOps, ReduceOps, MovementOps, LoadOps, OpType, LazyOp, get_lazyops, get_buffers, map_buffers
|
||||
from tinygrad.runtime.lib import RawConst, RawBuffer
|
||||
@@ -111,6 +111,7 @@ class LazyBuffer:
|
||||
if prod(self.op.arg.shape) == 1 and hasattr(Device[self.device].codegen, 'supports_constant_folding'):
|
||||
self.realized = RawConst(1, dtypes.from_np(self.op.arg.dtype), self.op.arg().flatten()[0])
|
||||
else:
|
||||
if DEBUG >= 4: print(f"copying {self.op.arg.shape}:{dtypes.from_np(self.op.arg.dtype)} -> {self.device}")
|
||||
self.realized = Device[self.device].buffer.fromCPU(self.op.arg(), **self._device_extra_args())
|
||||
elif self.op.op == LoadOps.CONTIGUOUS:
|
||||
realized = self.op.src[0].realize().realized
|
||||
|
||||
Reference in New Issue
Block a user