mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
@@ -67,7 +67,7 @@ class Function:
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# == LazyBuffer (in tinygrad/lazy.py, code 5/10) ==
|
# == LazyBuffer (in tinygrad/lazy.py, code 5/10) ==
|
||||||
from tinygrad.helpers import DType
|
from tinygrad.dtype import DType
|
||||||
|
|
||||||
# this is where the properties live that you thought were a part of Tensor
|
# this is where the properties live that you thought were a part of Tensor
|
||||||
# LazyBuffer is like a Tensor without derivatives, at the mlop layer
|
# LazyBuffer is like a Tensor without derivatives, at the mlop layer
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ print("******** second, the Device ***********")
|
|||||||
DEVICE = "CLANG" # NOTE: you can change this!
|
DEVICE = "CLANG" # NOTE: you can change this!
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.device import Buffer, Device
|
from tinygrad.device import Buffer, Device
|
||||||
from tinygrad.ops import LazyOp, BufferOps, MemBuffer, BinaryOps
|
from tinygrad.ops import LazyOp, BufferOps, MemBuffer, BinaryOps
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ There are even more of these factory methods, you can find them in the [tensor.p
|
|||||||
All the tensors creation methods can take a `dtype` argument to specify the data type of the tensor.
|
All the tensors creation methods can take a `dtype` argument to specify the data type of the tensor.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
|
|
||||||
t3 = Tensor([1, 2, 3, 4, 5], dtype=dtypes.int32)
|
t3 = Tensor([1, 2, 3, 4, 5], dtype=dtypes.int32)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
import time
|
import time
|
||||||
from tinygrad import Tensor, TinyJit, nn, Variable
|
from tinygrad import Tensor, TinyJit, nn, Variable, dtypes
|
||||||
from tinygrad.helpers import dtypes # TODO: wouldn't need this if argmax returned the right dtype
|
|
||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
import numpy as np # TODO: remove numpy import
|
import numpy as np # TODO: remove numpy import
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ from vits import Y_LENGTH_ESTIMATE_SCALARS, HParams, Synthesizer, TextMapper, ge
|
|||||||
from whisper import init_whisper, transcribe_waveform
|
from whisper import init_whisper, transcribe_waveform
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
|
||||||
from tinygrad.helpers import Timing, dtypes, fetch
|
from tinygrad.helpers import Timing, fetch
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
|
|
||||||
# Whisper constants
|
# Whisper constants
|
||||||
RATE = 16000
|
RATE = 16000
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Device, dtypes, Tensor
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
from tinygrad import Device
|
|
||||||
|
|
||||||
# TODO: will be better when tinygrad does math in the target dtype, can remove the floor and use a mul
|
# TODO: will be better when tinygrad does math in the target dtype, can remove the floor and use a mul
|
||||||
def bit_extract(x, s, e) -> Tensor:
|
def bit_extract(x, s, e) -> Tensor:
|
||||||
|
|||||||
@@ -4,13 +4,13 @@ from tqdm import trange
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad import Device, GlobalCounters
|
from tinygrad import Device, GlobalCounters
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.nn import Embedding, Linear, LayerNorm
|
from tinygrad.nn import Embedding, Linear, LayerNorm
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from tinygrad.nn.state import torch_load, load_state_dict, get_state_dict
|
from tinygrad.nn.state import torch_load, load_state_dict, get_state_dict
|
||||||
from tinygrad.helpers import Timing, DEBUG, getenv, fetch, colored, dtypes
|
from tinygrad.helpers import Timing, DEBUG, getenv, fetch, colored
|
||||||
|
|
||||||
MAX_CONTEXT = getenv("MAX_CONTEXT", 128)
|
MAX_CONTEXT = getenv("MAX_CONTEXT", 128)
|
||||||
HALF = getenv("HALF")
|
HALF = getenv("HALF")
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# setup for distributed
|
# setup for distributed
|
||||||
from extra import dist
|
from extra import dist
|
||||||
from tinygrad.helpers import getenv, dtypes
|
from tinygrad.helpers import getenv
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if getenv("DIST"):
|
if getenv("DIST"):
|
||||||
dist.preinit()
|
dist.preinit()
|
||||||
@@ -14,7 +14,7 @@ import random, time
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Any, Dict, Optional, SupportsIndex
|
from typing import Any, Dict, Optional, SupportsIndex
|
||||||
from extra.datasets import fetch_cifar, cifar_mean, cifar_std
|
from extra.datasets import fetch_cifar, cifar_mean, cifar_std
|
||||||
from tinygrad import nn
|
from tinygrad import nn, dtypes
|
||||||
from tinygrad.nn.state import get_state_dict
|
from tinygrad.nn.state import get_state_dict
|
||||||
from tinygrad.nn import optim
|
from tinygrad.nn import optim
|
||||||
from tinygrad import Device, GlobalCounters
|
from tinygrad import Device, GlobalCounters
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ from pathlib import Path
|
|||||||
import sys, argparse, json
|
import sys, argparse, json
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.set_printoptions(linewidth=200)
|
np.set_printoptions(linewidth=200)
|
||||||
from tinygrad.helpers import Timing, Profiling, getenv, DEBUG, dtypes, colored
|
from tinygrad.helpers import Timing, Profiling, getenv, DEBUG, colored
|
||||||
from tinygrad import Device, GlobalCounters
|
from tinygrad import Device, GlobalCounters, dtypes
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.nn.state import safe_load, torch_load, load_state_dict, get_parameters
|
from tinygrad.nn.state import safe_load, torch_load, load_state_dict, get_parameters
|
||||||
from extra.models.llama import Transformer, convert_from_huggingface
|
from extra.models.llama import Transformer, convert_from_huggingface
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad import Tensor, GlobalCounters
|
from tinygrad import Tensor, GlobalCounters, dtypes
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
from tinygrad.helpers import getenv, dtypes
|
from tinygrad.helpers import getenv
|
||||||
from examples.mlperf import helpers
|
from examples.mlperf import helpers
|
||||||
|
|
||||||
def eval_resnet():
|
def eval_resnet():
|
||||||
|
|||||||
@@ -4,9 +4,8 @@ import sys, logging, time, io, math, argparse, operator, numpy as np
|
|||||||
from functools import partial, reduce
|
from functools import partial, reduce
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Tuple, Optional, Type
|
from typing import Tuple, Optional, Type
|
||||||
from tinygrad import nn
|
from tinygrad import nn, dtypes, Tensor
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.helpers import getenv
|
||||||
from tinygrad.helpers import dtypes, getenv
|
|
||||||
from tinygrad.nn.state import torch_load
|
from tinygrad.nn.state import torch_load
|
||||||
from examples.vits import ResidualCouplingBlock, PosteriorEncoder, Encoder, ResBlock1, ResBlock2, LRELU_SLOPE, sequence_mask, split, download_if_not_present, get_hparams_from_file, load_checkpoint, weight_norm, HParams
|
from examples.vits import ResidualCouplingBlock, PosteriorEncoder, Encoder, ResBlock1, ResBlock2, LRELU_SLOPE, sequence_mask, split, download_if_not_present, get_hparams_from_file, load_checkpoint, weight_norm, HParams
|
||||||
from examples.sovits_helpers import preprocess
|
from examples.sovits_helpers import preprocess
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import math
|
import math
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
import librosa
|
import librosa
|
||||||
import soundfile
|
import soundfile
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|||||||
@@ -9,9 +9,8 @@ from collections import namedtuple
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Device, GlobalCounters, dtypes, Tensor
|
||||||
from tinygrad import Device, GlobalCounters
|
from tinygrad.helpers import Timing, Context, getenv, fetch, colored
|
||||||
from tinygrad.helpers import dtypes, Timing, Context, getenv, fetch, colored
|
|
||||||
from tinygrad.nn import Conv2d, Linear, GroupNorm, LayerNorm, Embedding
|
from tinygrad.nn import Conv2d, Linear, GroupNorm, LayerNorm, Embedding
|
||||||
from tinygrad.nn.state import torch_load, load_state_dict, get_state_dict
|
from tinygrad.nn.state import torch_load, load_state_dict, get_state_dict
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ from phonemizer.punctuation import Punctuation
|
|||||||
from functools import reduce
|
from functools import reduce
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
from tinygrad import nn
|
from tinygrad import nn, dtypes
|
||||||
from tinygrad.helpers import dtypes, fetch
|
from tinygrad.helpers import fetch
|
||||||
from tinygrad.nn.state import torch_load
|
from tinygrad.nn.state import torch_load
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pickle
|
import pickle
|
||||||
from tinygrad.runtime.ops_gpu import CLProgram, CLBuffer
|
from tinygrad.runtime.ops_gpu import CLProgram, CLBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad import dtypes
|
||||||
from tqdm import trange, tqdm
|
from tqdm import trange, tqdm
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
from typing import Tuple, List, NamedTuple, Any, Dict, Optional, Union, DefaultDict, cast
|
from typing import Tuple, List, NamedTuple, Any, Dict, Optional, Union, DefaultDict, cast
|
||||||
from tinygrad.codegen.linearizer import UOps, MemOp, UOp
|
from tinygrad.codegen.linearizer import UOps, MemOp, UOp
|
||||||
from tinygrad.ops import BinaryOps, UnaryOps
|
from tinygrad.ops import BinaryOps, UnaryOps
|
||||||
from tinygrad.helpers import DType, dtypes, DEBUG
|
from tinygrad.dtype import DType, dtypes
|
||||||
|
from tinygrad.helpers import DEBUG
|
||||||
from tinygrad.shape.symbolic import Variable, NumNode, MulNode, DivNode, ModNode, LtNode, SumNode, AndNode
|
from tinygrad.shape.symbolic import Variable, NumNode, MulNode, DivNode, ModNode, LtNode, SumNode, AndNode
|
||||||
import functools
|
import functools
|
||||||
import math
|
import math
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
import struct
|
import struct
|
||||||
from platform import system
|
from platform import system
|
||||||
from typing import Tuple, Dict, List, Optional
|
from typing import Tuple, Dict, List, Optional
|
||||||
|
from tinygrad import dtypes
|
||||||
from tinygrad.ops import BinaryOps, UnaryOps, TernaryOps
|
from tinygrad.ops import BinaryOps, UnaryOps, TernaryOps
|
||||||
from tinygrad.codegen.linearizer import UOps, UOp
|
from tinygrad.codegen.linearizer import UOps, UOp
|
||||||
from tinygrad.helpers import dtypes, CI
|
from tinygrad.helpers import CI
|
||||||
from tinygrad.codegen.assembly import uops_to_asmstyle, AssemblyLanguage
|
from tinygrad.codegen.assembly import uops_to_asmstyle, AssemblyLanguage
|
||||||
|
|
||||||
def float_to_hex(x): return "%02X%02X%02X%02X" % tuple(struct.pack("f",x)[::-1])
|
def float_to_hex(x): return "%02X%02X%02X%02X" % tuple(struct.pack("f",x)[::-1])
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from typing import List
|
|||||||
import struct
|
import struct
|
||||||
from tinygrad.codegen.assembly import uops_to_asmstyle, AssemblyLanguage
|
from tinygrad.codegen.assembly import uops_to_asmstyle, AssemblyLanguage
|
||||||
from tinygrad.codegen.linearizer import UOps, UOp
|
from tinygrad.codegen.linearizer import UOps, UOp
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad import dtypes
|
||||||
from tinygrad.ops import BinaryOps, UnaryOps, TernaryOps
|
from tinygrad.ops import BinaryOps, UnaryOps, TernaryOps
|
||||||
from tinygrad.runtime.ops_cuda import arch
|
from tinygrad.runtime.ops_cuda import arch
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import yaml
|
import yaml
|
||||||
from typing import Tuple, Set, Dict
|
from typing import Tuple, Set, Dict
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad import dtypes
|
||||||
from tinygrad.codegen.assembly import AssemblyCodegen, Register
|
from tinygrad.codegen.assembly import AssemblyCodegen, Register
|
||||||
from tinygrad.codegen.linearizer import UOps
|
from tinygrad.codegen.linearizer import UOps
|
||||||
from tinygrad.ops import BinaryOps, UnaryOps, TernaryOps
|
from tinygrad.ops import BinaryOps, UnaryOps, TernaryOps
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os, gzip, tarfile, pickle
|
import os, gzip, tarfile, pickle
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.helpers import dtypes, fetch
|
from tinygrad.helpers import fetch
|
||||||
|
|
||||||
def fetch_mnist(tensors=False):
|
def fetch_mnist(tensors=False):
|
||||||
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
|
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
from tinygrad.helpers import dtypes
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.tensor import Tensor
|
|
||||||
from extra.datasets.imagenet import iterate, get_val_files
|
from extra.datasets.imagenet import iterate, get_val_files
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from typing import Tuple, Dict, List
|
from typing import Tuple, Dict, List
|
||||||
from tinygrad.helpers import DType
|
from tinygrad.dtype import DType
|
||||||
from tinygrad.tensor import Device, Tensor
|
from tinygrad.tensor import Device, Tensor
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
from tinygrad.nn.state import get_state_dict
|
from tinygrad.nn.state import get_state_dict
|
||||||
|
|||||||
@@ -1,82 +0,0 @@
|
|||||||
old = """__kernel void re_S256_16_8( write_only image2d_t data0, read_only image2d_t data1, read_only image2d_t data2, __global float* data3 ) {
|
|
||||||
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
|
||||||
int idx2 = get_global_id(0); /* 4 */
|
|
||||||
int idx1 = get_global_id(1); /* 16 */
|
|
||||||
int idx0 = get_global_id(2); /* 256 */
|
|
||||||
float acc0 = 0.0f;
|
|
||||||
for (int idx3 = 0; idx3 < 8; idx3++) {
|
|
||||||
float4 val1_0 = read_imagef(data1, smp, (int2)(((idx1*8)+idx3), 0)) /* (1, 128, 4) */;
|
|
||||||
float4 val2_0 = read_imagef(data2, smp, (int2)(((idx1*32)+(idx3*4)+idx2), idx0)) /* (256, 512, 4) */;
|
|
||||||
acc0+=(val1_0.x*val2_0.x);
|
|
||||||
acc0+=(val1_0.y*val2_0.y);
|
|
||||||
acc0+=(val1_0.z*val2_0.z);
|
|
||||||
acc0+=(val1_0.w*val2_0.w);
|
|
||||||
}
|
|
||||||
__local float temp[64];
|
|
||||||
temp[((idx1*4)+idx2)] = acc0;
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
if (((idx1*4)+idx2) == 0) {
|
|
||||||
float4 output0 = (float4)(0.0f,0.0f,0.0f,0.0f);
|
|
||||||
for (int mid = 0; mid < 16; mid++) {
|
|
||||||
float4 val5_0 = ((__local float4*)temp)[mid];
|
|
||||||
output0.x+=val5_0.x;
|
|
||||||
output0.y+=val5_0.y;
|
|
||||||
output0.z+=val5_0.z;
|
|
||||||
output0.w+=val5_0.w;
|
|
||||||
}
|
|
||||||
float4 val3_0 = ((__global float4*)data3)[idx0];
|
|
||||||
write_imagef(data0, (int2)(idx0, 0), (float4)(max((output0.x+val3_0.x),(0.0f)),max((output0.y+val3_0.y),(0.0f)),max((output0.z+val3_0.z),(0.0f)),max((output0.w+val3_0.w),(0.0f)))); /* (1, 256, 4) */
|
|
||||||
}
|
|
||||||
}"""
|
|
||||||
|
|
||||||
new = """__kernel void r_256_16_4_8_4(write_only image2d_t data0, read_only image2d_t data1, read_only image2d_t data2, const __global float* data3) {
|
|
||||||
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
|
||||||
__attribute__ ((aligned (16))) __local float temp[64];
|
|
||||||
int gidx0 = get_group_id(0); /* 256 */
|
|
||||||
int lidx1 = get_local_id(1); /* 16 */
|
|
||||||
int lidx2 = get_local_id(0); /* 4 */
|
|
||||||
float acc0 = 0.0f;
|
|
||||||
for (int ridx0 = 0; ridx0 < 8; ++ridx0) {
|
|
||||||
float4 val0 = read_imagef(data1, smp, (int2)(((lidx1*8)+ridx0),0));
|
|
||||||
float4 val1 = read_imagef(data2, smp, (int2)(((lidx1*32)+lidx2+(ridx0*4)),gidx0));
|
|
||||||
acc0 = (((val0).x*(val1).x)+acc0);
|
|
||||||
acc0 = (((val0).y*(val1).y)+acc0);
|
|
||||||
acc0 = (((val0).z*(val1).z)+acc0);
|
|
||||||
acc0 = (((val0).w*(val1).w)+acc0);
|
|
||||||
}
|
|
||||||
temp[(lidx1*4)+lidx2] = acc0;
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
float4 acc1 = (float4)(0.0f,0.0f,0.0f,0.0f);
|
|
||||||
for (int ridx1 = 0; ridx1 < 16; ++ridx1) {
|
|
||||||
float4 val2 = (float4)(*((__local float4*)(temp+ridx1*4)));
|
|
||||||
(acc1).x = ((val2).x+(acc1).x);
|
|
||||||
(acc1).y = ((val2).y+(acc1).y);
|
|
||||||
(acc1).z = ((val2).z+(acc1).z);
|
|
||||||
(acc1).w = ((val2).w+(acc1).w);
|
|
||||||
}
|
|
||||||
float4 val3 = (float4)(*((__global float4*)(data3+gidx0*4)));
|
|
||||||
write_imagef(data0, (int2)(gidx0,0), (float4)(max(((acc1).x+(val3).x),0.0f),max(((acc1).y+(val3).y),0.0f),max(((acc1).z+(val3).z),0.0f),max(((acc1).w+(val3).w),0.0f)));
|
|
||||||
}"""
|
|
||||||
|
|
||||||
from tinygrad.runtime.ops_gpu import CLBuffer, CLProgram
|
|
||||||
from tinygrad.helpers import dtypes, prod
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
out = CLBuffer(prod((1, 128, 4)), dtypes.imageh((1,128,4)))
|
|
||||||
x = CLBuffer(prod((1, 128, 4)), dtypes.imageh((1,128,4)))
|
|
||||||
w = CLBuffer(prod((256, 512, 4)), dtypes.imageh((256, 512, 4)))
|
|
||||||
b = CLBuffer(1024, dtypes.float)
|
|
||||||
|
|
||||||
old = CLProgram("re_S256_16_8", old)
|
|
||||||
new = CLProgram("r_256_16_4_8_4", new)
|
|
||||||
|
|
||||||
old_tms = []
|
|
||||||
new_tms = []
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
old_tms.append(old([1,1,256], [4,16,1], out, x, w, b, wait=True))
|
|
||||||
new_tms.append(new([256,1,1], [4,16,1], out, x, w, b, wait=True))
|
|
||||||
|
|
||||||
print(f"old: {min(old_tms)*1e6:.2f} us new: {min(new_tms)*1e6:.2f} us")
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.helpers import dtypes, getenv, prod, flat_mv
|
from tinygrad import dtypes
|
||||||
|
from tinygrad.helpers import getenv, prod, flat_mv
|
||||||
from tinygrad.runtime.ops_hip import HIPAllocator, HIPProgram, compile_hip
|
from tinygrad.runtime.ops_hip import HIPAllocator, HIPProgram, compile_hip
|
||||||
|
|
||||||
# AMD_LOG_LEVEL=3 ./MIOpenDriver gemm --iter 1000 --time 1 --a_w 2048 --a_h 2048 --b_w 2048
|
# AMD_LOG_LEVEL=3 ./MIOpenDriver gemm --iter 1000 --time 1 --a_w 2048 --a_h 2048 --b_w 2048
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ import os
|
|||||||
os.environ["METAL"] = "1"
|
os.environ["METAL"] = "1"
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.helpers import dtypes, getenv, flat_mv
|
from tinygrad import Device, dtypes
|
||||||
from tinygrad import Device
|
from tinygrad.helpers import getenv, flat_mv
|
||||||
from tinygrad.runtime.ops_metal import MetalAllocator, MetalDevice, MetalProgram, compile_metal
|
from tinygrad.runtime.ops_metal import MetalAllocator, MetalDevice, MetalProgram, compile_metal
|
||||||
|
|
||||||
N = getenv("N", 2048)
|
N = getenv("N", 2048)
|
||||||
|
|||||||
@@ -5,14 +5,14 @@ import time, torch, torch.mps
|
|||||||
|
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
from tinygrad import Device, GlobalCounters
|
from tinygrad import Device, GlobalCounters, dtypes
|
||||||
from tinygrad.helpers import colored, getenv, CI, flat_mv
|
from tinygrad.helpers import colored, getenv, CI, flat_mv
|
||||||
|
|
||||||
import os
|
import os
|
||||||
os.environ["METAL"] = "1"
|
os.environ["METAL"] = "1"
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.helpers import dtypes, getenv
|
from tinygrad.helpers import getenv
|
||||||
from tinygrad.runtime.ops_metal import MetalAllocator, MetalDevice, MetalProgram, compile_metal
|
from tinygrad.runtime.ops_metal import MetalAllocator, MetalDevice, MetalProgram, compile_metal
|
||||||
|
|
||||||
N = 16384
|
N = 16384
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.helpers import getenv
|
from tinygrad.helpers import getenv
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import dtypes, Tensor
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
dtype_in = dtypes.half if getenv("HALF") else dtypes.float
|
dtype_in = dtypes.half if getenv("HALF") else dtypes.float
|
||||||
N = getenv("N", 4096)
|
N = getenv("N", 4096)
|
||||||
CNT = getenv("CNT", 10)
|
CNT = getenv("CNT", 10)
|
||||||
|
|||||||
@@ -3,9 +3,8 @@ import math
|
|||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tinygrad import nn
|
from tinygrad import nn, Tensor, dtypes
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.helpers import get_child, fetch
|
||||||
from tinygrad.helpers import dtypes, get_child, fetch
|
|
||||||
from tinygrad.nn.state import torch_load
|
from tinygrad.nn.state import torch_load
|
||||||
from extra.models.resnet import ResNet
|
from extra.models.resnet import ResNet
|
||||||
from extra.models.retinanet import nms as _box_nms
|
from extra.models.retinanet import nms as _box_nms
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ from __future__ import annotations
|
|||||||
from google.protobuf.internal.containers import RepeatedCompositeFieldContainer
|
from google.protobuf.internal.containers import RepeatedCompositeFieldContainer
|
||||||
import importlib
|
import importlib
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.helpers import getenv, DEBUG, dtypes
|
from tinygrad.helpers import getenv, DEBUG
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from onnx import AttributeProto, ModelProto, TensorProto, TypeProto # onnx 1.50 uses serialized file (see onnx/onnx-ml.proto) as descriptors
|
from onnx import AttributeProto, ModelProto, TensorProto, TypeProto # onnx 1.50 uses serialized file (see onnx/onnx-ml.proto) as descriptors
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import functools, io, math
|
import functools, io, math
|
||||||
from typing import Union, Tuple, Optional, List, Any
|
from typing import Union, Tuple, Optional, List, Any
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.helpers import prod, dtypes, ImageDType, flatten
|
from tinygrad.dtype import ImageDType
|
||||||
|
from tinygrad.helpers import prod, flatten
|
||||||
from extra.onnx import safe_numpy
|
from extra.onnx import safe_numpy
|
||||||
from onnx.helper import tensor_dtype_to_np_dtype
|
from onnx.helper import tensor_dtype_to_np_dtype
|
||||||
from onnx import TensorProto
|
from onnx import TensorProto
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from tinygrad.helpers import getenv
|
|||||||
|
|
||||||
# stuff needed to unpack a kernel
|
# stuff needed to unpack a kernel
|
||||||
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.view import View
|
from tinygrad.shape.view import View
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import numpy as np
|
|||||||
|
|
||||||
# stuff needed to unpack a kernel
|
# stuff needed to unpack a kernel
|
||||||
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.view import View
|
from tinygrad.shape.view import View
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# stuff needed to unpack a kernel
|
# stuff needed to unpack a kernel
|
||||||
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.view import View
|
from tinygrad.shape.view import View
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_lo
|
|||||||
|
|
||||||
# stuff needed to unpack a kernel
|
# stuff needed to unpack a kernel
|
||||||
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.view import View
|
from tinygrad.shape.view import View
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
|
|||||||
@@ -13,8 +13,9 @@ import onnx
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from typing import Tuple, List, Optional, Dict
|
from typing import Tuple, List, Optional, Dict
|
||||||
from extra.onnx import get_run_onnx
|
from extra.onnx import get_run_onnx
|
||||||
from tinygrad import Tensor, Device, GlobalCounters
|
from tinygrad import Tensor, Device, GlobalCounters, dtypes
|
||||||
from tinygrad.helpers import dtypes, partition, Context, fetch, getenv, ImageDType, GRAPH, DEBUG
|
from tinygrad.dtype import ImageDType
|
||||||
|
from tinygrad.helpers import partition, Context, fetch, getenv, GRAPH, DEBUG
|
||||||
from tinygrad.realize import run_schedule, lower_schedule_item
|
from tinygrad.realize import run_schedule, lower_schedule_item
|
||||||
from tinygrad.ops import LoadOps, ScheduleItem
|
from tinygrad.ops import LoadOps, ScheduleItem
|
||||||
Device.DEFAULT = "GPU"
|
Device.DEFAULT = "GPU"
|
||||||
|
|||||||
2
test/external/external_osx_profiling.py
vendored
2
test/external/external_osx_profiling.py
vendored
@@ -1,5 +1,5 @@
|
|||||||
from tinygrad.runtime.ops_gpu import CLProgram, CL, CLBuffer
|
from tinygrad.runtime.ops_gpu import CLProgram, CL, CLBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad import dtypes
|
||||||
import time
|
import time
|
||||||
|
|
||||||
N = 1000000
|
N = 1000000
|
||||||
|
|||||||
4
test/external/external_test_jit_on_models.py
vendored
4
test/external/external_test_jit_on_models.py
vendored
@@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import Tensor, dtypes
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
from tinygrad.helpers import dtypes, CI
|
from tinygrad.helpers import CI
|
||||||
from test.helpers import derandomize_model
|
from test.helpers import derandomize_model
|
||||||
|
|
||||||
from examples.llama import Transformer
|
from examples.llama import Transformer
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ from tinygrad.tensor import Tensor
|
|||||||
from tinygrad.nn import optim
|
from tinygrad.nn import optim
|
||||||
from tinygrad.nn.state import get_parameters
|
from tinygrad.nn.state import get_parameters
|
||||||
from tinygrad.jit import TinyJit
|
from tinygrad.jit import TinyJit
|
||||||
from tinygrad import Device, GlobalCounters
|
from tinygrad import Device, GlobalCounters, dtypes
|
||||||
from tinygrad.helpers import CI, dtypes
|
from tinygrad.helpers import CI
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
from test.helpers import derandomize_model
|
from test.helpers import derandomize_model
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad import Device
|
from tinygrad import Device, dtypes
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
|
|
||||||
N = 200 # has to be bigger than the cache to fail
|
N = 200 # has to be bigger than the cache to fail
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,8 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
from tinygrad.helpers import prod, dtypes
|
from tinygrad.helpers import prod
|
||||||
|
from tinygrad.dtype import dtypes
|
||||||
|
|
||||||
# *** first, we implement the atan2 op at the lowest level ***
|
# *** first, we implement the atan2 op at the lowest level ***
|
||||||
# `atan2_gpu` for GPUBuffers and `atan2_cpu` for CPUBuffers
|
# `atan2_gpu` for GPUBuffers and `atan2_cpu` for CPUBuffers
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import unittest
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import operator
|
import operator
|
||||||
from tinygrad.helpers import CI, DTYPES_DICT, getenv, DType, DEBUG, ImageDType, PtrDType, OSX, least_upper_float, temp, least_upper_dtype
|
from tinygrad.helpers import CI, getenv, DEBUG, OSX, temp
|
||||||
|
from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype
|
||||||
from tinygrad import Device
|
from tinygrad import Device
|
||||||
from tinygrad.tensor import Tensor, dtypes
|
from tinygrad.tensor import Tensor, dtypes
|
||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ from tinygrad import Tensor, dtypes, Device
|
|||||||
import operator
|
import operator
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from hypothesis import given, strategies as st, settings
|
from hypothesis import given, strategies as st, settings
|
||||||
from tinygrad.helpers import CI, getenv, DType, OSX
|
from tinygrad.dtype import DType
|
||||||
|
from tinygrad.helpers import CI, getenv, OSX
|
||||||
from tinygrad.ops import UnaryOps, get_lazyop_info
|
from tinygrad.ops import UnaryOps, get_lazyop_info
|
||||||
|
|
||||||
settings.register_profile("my_profile", max_examples=200, deadline=None)
|
settings.register_profile("my_profile", max_examples=200, deadline=None)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad import Device, dtypes, Tensor, Variable
|
from tinygrad import Device, dtypes, Tensor, Variable
|
||||||
from tinygrad.helpers import ImageDType
|
from tinygrad.dtype import ImageDType
|
||||||
from tinygrad.features.image import to_image_idx
|
from tinygrad.features.image import to_image_idx
|
||||||
|
|
||||||
@unittest.skipIf(Device.DEFAULT != "GPU", "only images on GPU")
|
@unittest.skipIf(Device.DEFAULT != "GPU", "only images on GPU")
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from tinygrad.tensor import Tensor
|
|||||||
# ruff: noqa: F401
|
# ruff: noqa: F401
|
||||||
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
from tinygrad.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer
|
||||||
from tinygrad.lazy import LazyBuffer
|
from tinygrad.lazy import LazyBuffer
|
||||||
from tinygrad.helpers import dtypes
|
from tinygrad import dtypes
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.view import View
|
from tinygrad.shape.view import View
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
|
|||||||
@@ -12,7 +12,8 @@ from tinygrad.shape.symbolic import MulNode, SumNode, Variable, NumNode, Node, c
|
|||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.jit import CacheCollector
|
from tinygrad.jit import CacheCollector
|
||||||
from tinygrad.realize import run_schedule
|
from tinygrad.realize import run_schedule
|
||||||
from tinygrad.helpers import dtypes, prod
|
from tinygrad.helpers import prod
|
||||||
|
from tinygrad.dtype import dtypes
|
||||||
|
|
||||||
@unittest.skipIf(not isinstance(Device[Device.DEFAULT], Compiled), "linearizer is only for compiled backends")
|
@unittest.skipIf(not isinstance(Device[Device.DEFAULT], Compiled), "linearizer is only for compiled backends")
|
||||||
class TestLinearizer(unittest.TestCase):
|
class TestLinearizer(unittest.TestCase):
|
||||||
|
|||||||
@@ -2,13 +2,12 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from tinygrad.codegen.linearizer import Linearizer
|
from tinygrad.codegen.linearizer import Linearizer
|
||||||
from tinygrad.features.search import Opt, OptOps
|
from tinygrad.features.search import Opt, OptOps
|
||||||
from tinygrad import Device
|
from tinygrad import Device, dtypes
|
||||||
from tinygrad.helpers import OSX, CI
|
from tinygrad.helpers import OSX, CI
|
||||||
from test.external.fuzz_linearizer import run_linearizer
|
from test.external.fuzz_linearizer import run_linearizer
|
||||||
|
|
||||||
# stuff needed to unpack a kernel
|
# stuff needed to unpack a kernel
|
||||||
from tinygrad.ops import LazyOp, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer, get_lazyop_info
|
from tinygrad.ops import LazyOp, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer, get_lazyop_info
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.view import View
|
from tinygrad.shape.view import View
|
||||||
inf, nan = float('inf'), float('nan')
|
inf, nan = float('inf'), float('nan')
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ import math
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import unittest
|
import unittest
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, dtypes
|
from tinygrad.helpers import getenv, IMAGE, DEBUG, CI
|
||||||
from tinygrad import Device
|
from tinygrad import Device, dtypes
|
||||||
|
|
||||||
if CI:
|
if CI:
|
||||||
import warnings
|
import warnings
|
||||||
|
|||||||
@@ -3,9 +3,7 @@ import math
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad import nn, dtypes, Tensor
|
||||||
import tinygrad.nn as nn
|
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
# https://gist.github.com/devries/11405101
|
# https://gist.github.com/devries/11405101
|
||||||
|
|||||||
@@ -7,10 +7,10 @@ from typing import List, Optional
|
|||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.ops import LoadOps
|
from tinygrad.ops import LoadOps
|
||||||
from tinygrad.device import Device, Compiled
|
from tinygrad.device import Device, Compiled
|
||||||
from tinygrad.helpers import DEBUG, dtypes
|
from tinygrad.helpers import DEBUG
|
||||||
from tinygrad.codegen.linearizer import Linearizer
|
from tinygrad.codegen.linearizer import Linearizer
|
||||||
from tinygrad.graph import print_tree, realized_lazybuffer
|
from tinygrad.graph import print_tree, realized_lazybuffer
|
||||||
from tinygrad import nn
|
from tinygrad import nn, dtypes
|
||||||
|
|
||||||
def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True):
|
def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True):
|
||||||
seen = set()
|
seen = set()
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.helpers import CI, dtypes
|
from tinygrad.helpers import CI
|
||||||
from tinygrad import Device
|
from tinygrad import Device, dtypes
|
||||||
# similar to test/external/external_test_gpu_ast.py, but universal
|
# similar to test/external/external_test_gpu_ast.py, but universal
|
||||||
|
|
||||||
@unittest.skipIf(Device.DEFAULT == "CUDA" and CI, "slow on CUDA CI")
|
@unittest.skipIf(Device.DEFAULT == "CUDA" and CI, "slow on CUDA CI")
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
import unittest, copy
|
import unittest, copy
|
||||||
import mmap
|
import mmap
|
||||||
from tinygrad.tensor import Tensor, Device
|
from tinygrad import Tensor, Device, dtypes
|
||||||
from tinygrad.helpers import dtypes, temp
|
from tinygrad.helpers import temp
|
||||||
from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
|
from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
|
||||||
|
|
||||||
x_init = np.random.randn(1,3).astype(np.float32)
|
x_init = np.random.randn(1,3).astype(np.float32)
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
from typing import Optional, Tuple, Any, List
|
from typing import Optional, Tuple, Any, List
|
||||||
import unittest, math
|
import unittest, math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad.helpers import dtypes, getenv, DType, PtrDType
|
from tinygrad.dtype import dtypes, DType, PtrDType
|
||||||
|
from tinygrad.helpers import getenv
|
||||||
from tinygrad.device import Buffer, Device
|
from tinygrad.device import Buffer, Device
|
||||||
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps
|
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps
|
||||||
from tinygrad.device import CompiledASTRunner, Compiled
|
from tinygrad.device import CompiledASTRunner, Compiled
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import unittest
|
import unittest
|
||||||
|
from tinygrad import dtypes
|
||||||
from tinygrad.ops import LazyOp, BinaryOps, ReduceOps, get_lazyop_info, BufferOps, MemBuffer
|
from tinygrad.ops import LazyOp, BinaryOps, ReduceOps, get_lazyop_info, BufferOps, MemBuffer
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.helpers import dtypes
|
|
||||||
|
|
||||||
class TestFlopCounter(unittest.TestCase):
|
class TestFlopCounter(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from tinygrad.helpers import Context, ContextVar, DType, dtypes, merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten
|
from tinygrad.dtype import DType, dtypes
|
||||||
|
from tinygrad.helpers import Context, ContextVar, merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten
|
||||||
from tinygrad.shape.symbolic import Variable, NumNode
|
from tinygrad.shape.symbolic import Variable, NumNode
|
||||||
|
|
||||||
VARIABLE = ContextVar("VARIABLE", 0)
|
VARIABLE = ContextVar("VARIABLE", 0)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from tinygrad.tensor import Tensor # noqa: F401
|
from tinygrad.tensor import Tensor # noqa: F401
|
||||||
from tinygrad.jit import TinyJit # noqa: F401
|
from tinygrad.jit import TinyJit # noqa: F401
|
||||||
from tinygrad.shape.symbolic import Variable # noqa: F401
|
from tinygrad.shape.symbolic import Variable # noqa: F401
|
||||||
from tinygrad.helpers import dtypes # noqa: F401
|
from tinygrad.dtype import dtypes # noqa: F401
|
||||||
from tinygrad.ops import GlobalCounters # noqa: F401
|
from tinygrad.ops import GlobalCounters # noqa: F401
|
||||||
from tinygrad.device import Device # noqa: F401
|
from tinygrad.device import Device # noqa: F401
|
||||||
@@ -3,7 +3,8 @@ import os, math, itertools
|
|||||||
from typing import NamedTuple, Optional, List, Tuple, cast, Dict, Union
|
from typing import NamedTuple, Optional, List, Tuple, cast, Dict, Union
|
||||||
from tinygrad.ops import LazyOp, FlopCounter, get_lazyop_info, UnaryOps, BinaryOps, ReduceOps, MemBuffer, ConstBuffer, BufferOps, vars_from_ast
|
from tinygrad.ops import LazyOp, FlopCounter, get_lazyop_info, UnaryOps, BinaryOps, ReduceOps, MemBuffer, ConstBuffer, BufferOps, vars_from_ast
|
||||||
from tinygrad.device import Device, Compiled
|
from tinygrad.device import Device, Compiled
|
||||||
from tinygrad.helpers import dedup, dtypes, colored, ImageDType, DType, ansilen, getenv, prod, DEBUG, round_up
|
from tinygrad.dtype import dtypes, ImageDType, DType
|
||||||
|
from tinygrad.helpers import dedup, colored, ansilen, getenv, prod, DEBUG, round_up
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
|
from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
|
||||||
from tinygrad.shape.symbolic import sint
|
from tinygrad.shape.symbolic import sint
|
||||||
from tinygrad.shape.view import View, strides_for_shape
|
from tinygrad.shape.view import View, strides_for_shape
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ from collections import defaultdict
|
|||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from tinygrad.helpers import colored, ImageDType, DEBUG, dtypes, DType, prod, PtrDType, getenv, all_same, to_function_name, flatten
|
from tinygrad.dtype import ImageDType, dtypes, DType, PtrDType
|
||||||
|
from tinygrad.helpers import colored, DEBUG, prod, getenv, all_same, to_function_name, flatten
|
||||||
from tinygrad.ops import LazyOp, UnaryOps, BinaryOps, TernaryOps, ReduceOps, ConstBuffer, MemBuffer, BufferOps, vars_from_ast, get_lazyop_info
|
from tinygrad.ops import LazyOp, UnaryOps, BinaryOps, TernaryOps, ReduceOps, ConstBuffer, MemBuffer, BufferOps, vars_from_ast, get_lazyop_info
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
from tinygrad.shape.symbolic import Variable, NumNode, VariableOrNum, Node, SumNode, MulNode, DivNode, ModNode, LtNode, AndNode
|
from tinygrad.shape.symbolic import Variable, NumNode, VariableOrNum, Node, SumNode, MulNode, DivNode, ModNode, LtNode, AndNode
|
||||||
|
|||||||
@@ -3,8 +3,8 @@ import numpy as np
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import TYPE_CHECKING, Union, Any, List, Optional, Dict, Callable
|
from typing import TYPE_CHECKING, Union, Any, List, Optional, Dict, Callable
|
||||||
import importlib, inspect, functools, pathlib, time, re, ctypes
|
import importlib, inspect, functools, pathlib, time, re, ctypes
|
||||||
from tinygrad.helpers import DType, dtypes, ImageDType, diskcache_get, diskcache_put
|
from tinygrad.dtype import DType, dtypes, ImageDType
|
||||||
from tinygrad.helpers import ansilen, DEBUG, getenv, colored, BEAM, NOOPT, all_int, to_function_name, from_mv, flat_mv
|
from tinygrad.helpers import ansilen, DEBUG, getenv, colored, BEAM, NOOPT, all_int, to_function_name, from_mv, flat_mv, diskcache_get, diskcache_put
|
||||||
from tinygrad.shape.symbolic import Variable, sym_infer, sint
|
from tinygrad.shape.symbolic import Variable, sym_infer, sint
|
||||||
from tinygrad.ops import LazyOp, TernaryOps, get_lazyop_info, ReduceOps, BufferOps, BinaryOps, UnaryOps, Op, vars_from_ast, GlobalCounters
|
from tinygrad.ops import LazyOp, TernaryOps, get_lazyop_info, ReduceOps, BufferOps, BinaryOps, UnaryOps, Op, vars_from_ast, GlobalCounters
|
||||||
|
|
||||||
|
|||||||
99
tinygrad/dtype.py
Normal file
99
tinygrad/dtype.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
from typing import NamedTuple, Final, Optional, ClassVar, Set, Tuple, Dict
|
||||||
|
import numpy as np # TODO: remove numpy
|
||||||
|
import functools
|
||||||
|
|
||||||
|
# TODO: migrate this from NamedTuple -> dataclass
|
||||||
|
class DType(NamedTuple):
|
||||||
|
priority: int # this determines when things get upcasted
|
||||||
|
itemsize: int
|
||||||
|
name: str
|
||||||
|
np: Optional[type] # TODO: someday this will be removed with the "remove numpy" project
|
||||||
|
sz: int = 1
|
||||||
|
def __repr__(self): return f"dtypes.{INVERSE_DTYPES_DICT[self]}" if self.sz == 1 else f"dtypes._{INVERSE_DTYPES_DICT[self.scalar()]}{self.sz}"
|
||||||
|
def vec(self, sz:int):
|
||||||
|
assert sz > 1 and self.sz == 1, f"can't vectorize {self} with size {sz}"
|
||||||
|
return DType(self.priority, self.itemsize*sz, f"{INVERSE_DTYPES_DICT[self]}{str(sz)}", None, sz)
|
||||||
|
def scalar(self): return DTYPES_DICT[self.name[:-len(str(self.sz))]] if self.sz > 1 else self
|
||||||
|
|
||||||
|
# dependent typing?
|
||||||
|
class ImageDType(DType):
|
||||||
|
def __new__(cls, priority, itemsize, name, np, shape, base):
|
||||||
|
return super().__new__(cls, priority, itemsize, name, np)
|
||||||
|
def __init__(self, priority, itemsize, name, np, shape, base):
|
||||||
|
self.shape: Tuple[int, ...] = shape # arbitrary arg for the dtype, used in image for the shape
|
||||||
|
self.base: DType = base
|
||||||
|
super().__init__()
|
||||||
|
def scalar(self): return self.base
|
||||||
|
def vec(self, sz:int): return self.base.vec(sz)
|
||||||
|
def __repr__(self): return f"dtypes.{self.name}({self.shape})"
|
||||||
|
# TODO: fix this to not need these
|
||||||
|
def __hash__(self): return hash((super().__hash__(), self.shape))
|
||||||
|
def __eq__(self, x): return super().__eq__(x) and self.shape == x.shape
|
||||||
|
def __ne__(self, x): return super().__ne__(x) or self.shape != x.shape
|
||||||
|
|
||||||
|
class PtrDType(DType):
|
||||||
|
def __new__(cls, dt:DType): return super().__new__(cls, dt.priority, dt.itemsize, dt.name, dt.np, dt.sz)
|
||||||
|
def __repr__(self): return f"ptr.{super().__repr__()}"
|
||||||
|
|
||||||
|
class dtypes:
|
||||||
|
@staticmethod
|
||||||
|
def is_float(x: DType) -> bool: return x.scalar() in (dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64)
|
||||||
|
@staticmethod # static methds on top, or bool in the type info will refer to dtypes.bool
|
||||||
|
def is_int(x: DType) -> bool: return x.scalar() in (dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64) or dtypes.is_unsigned(x)
|
||||||
|
@staticmethod
|
||||||
|
def is_unsigned(x: DType) -> bool: return x.scalar() in (dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64)
|
||||||
|
@staticmethod
|
||||||
|
def from_np(x) -> DType: return DTYPES_DICT[np.dtype(x).name]
|
||||||
|
@staticmethod # NOTE: isinstance(True, int) is True in python
|
||||||
|
def from_py(x) -> DType: return dtypes.default_float if isinstance(x, float) else dtypes.bool if isinstance(x, bool) else dtypes.default_int
|
||||||
|
@staticmethod
|
||||||
|
def fields() -> Dict[str, DType]: return DTYPES_DICT
|
||||||
|
bool: Final[DType] = DType(0, 1, "bool", np.bool_)
|
||||||
|
int8: Final[DType] = DType(1, 1, "char", np.int8)
|
||||||
|
uint8: Final[DType] = DType(2, 1, "unsigned char", np.uint8)
|
||||||
|
int16: Final[DType] = DType(3, 2, "short", np.int16)
|
||||||
|
uint16: Final[DType] = DType(4, 2, "unsigned short", np.uint16)
|
||||||
|
int32: Final[DType] = DType(5, 4, "int", np.int32)
|
||||||
|
uint32: Final[DType] = DType(6, 4, "unsigned int", np.uint32)
|
||||||
|
int64: Final[DType] = DType(7, 8, "long", np.int64)
|
||||||
|
uint64: Final[DType] = DType(8, 8, "unsigned long", np.uint64)
|
||||||
|
float16: Final[DType] = DType(9, 2, "half", np.float16)
|
||||||
|
# bfloat16 has higher priority than float16, so least_upper_dtype(dtypes.int64, dtypes.uint64) = dtypes.float16
|
||||||
|
bfloat16: Final[DType] = DType(10, 2, "__bf16", None)
|
||||||
|
float32: Final[DType] = DType(11, 4, "float", np.float32)
|
||||||
|
float64: Final[DType] = DType(12, 8, "double", np.float64)
|
||||||
|
|
||||||
|
# dtype aliases
|
||||||
|
half = float16; float = float32; double = float64 # noqa: E702
|
||||||
|
uchar = uint8; ushort = uint16; uint = uint32; ulong = uint64 # noqa: E702
|
||||||
|
char = int8; short = int16; int = int32; long = int64 # noqa: E702
|
||||||
|
|
||||||
|
# NOTE: these are image dtypes
|
||||||
|
@staticmethod
|
||||||
|
def imageh(shp): return ImageDType(100, 2, "imageh", np.float16, shp, dtypes.float32)
|
||||||
|
@staticmethod
|
||||||
|
def imagef(shp): return ImageDType(100, 4, "imagef", np.float32, shp, dtypes.float32)
|
||||||
|
|
||||||
|
default_float: ClassVar[DType] = float32
|
||||||
|
default_int: ClassVar[DType] = int32
|
||||||
|
|
||||||
|
# https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html
|
||||||
|
# we don't support weak type and complex type
|
||||||
|
promo_lattice = { dtypes.bool: [dtypes.int8, dtypes.uint8],
|
||||||
|
dtypes.int8: [dtypes.int16], dtypes.int16: [dtypes.int32], dtypes.int32: [dtypes.int64], dtypes.int64: [dtypes.float16, dtypes.bfloat16],
|
||||||
|
dtypes.uint8: [dtypes.int16, dtypes.uint16], dtypes.uint16: [dtypes.int32, dtypes.uint32],
|
||||||
|
dtypes.uint32: [dtypes.int64, dtypes.uint64], dtypes.uint64: [dtypes.float16, dtypes.bfloat16],
|
||||||
|
dtypes.float16: [dtypes.float32], dtypes.bfloat16: [dtypes.float32], dtypes.float32: [dtypes.float64], }
|
||||||
|
|
||||||
|
@functools.lru_cache(None)
|
||||||
|
def _get_recursive_parents(dtype:DType) -> Set[DType]:
|
||||||
|
return set.union(*[_get_recursive_parents(d) for d in promo_lattice[dtype]], {dtype}) if dtype != dtypes.float64 else {dtypes.float64}
|
||||||
|
@functools.lru_cache(None)
|
||||||
|
def least_upper_dtype(*ds:DType) -> DType:
|
||||||
|
return min(set.intersection(*[_get_recursive_parents(d) for d in ds])) if not (images:=[d for d in ds if isinstance(d, ImageDType)]) else images[0]
|
||||||
|
def least_upper_float(dt:DType) -> DType: return dt if dtypes.is_float(dt) else least_upper_dtype(dt, dtypes.float32)
|
||||||
|
|
||||||
|
# HACK: staticmethods are not callable in 3.8 so we have to compare the class
|
||||||
|
DTYPES_DICT = {k: v for k, v in dtypes.__dict__.items() if (
|
||||||
|
not k.startswith('__') and not k.startswith('default') and not callable(v) and v.__class__ is not staticmethod)}
|
||||||
|
INVERSE_DTYPES_DICT = {v:k for k,v in DTYPES_DICT.items()}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
from tinygrad.helpers import prod, IMAGE, getenv, dtypes, DEBUG
|
from tinygrad.helpers import prod, IMAGE, getenv, DEBUG
|
||||||
|
from tinygrad.dtype import dtypes
|
||||||
|
|
||||||
# *** image Tensor function replacements ***
|
# *** image Tensor function replacements ***
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ from typing import Dict, List, cast, DefaultDict, Optional, Tuple, Callable
|
|||||||
import itertools, random, math, time, multiprocessing, traceback, signal
|
import itertools, random, math, time, multiprocessing, traceback, signal
|
||||||
from tinygrad.device import Device, Compiled, Buffer
|
from tinygrad.device import Device, Compiled, Buffer
|
||||||
from tinygrad.ops import MemBuffer, vars_from_ast
|
from tinygrad.ops import MemBuffer, vars_from_ast
|
||||||
from tinygrad.helpers import prod, ImageDType, flatten, DEBUG, CACHELEVEL, diskcache_get, diskcache_put, getenv, Context, colored, to_function_name
|
from tinygrad.helpers import prod, flatten, DEBUG, CACHELEVEL, diskcache_get, diskcache_put, getenv, Context, colored, to_function_name
|
||||||
|
from tinygrad.dtype import ImageDType
|
||||||
from tinygrad.codegen.linearizer import Linearizer
|
from tinygrad.codegen.linearizer import Linearizer
|
||||||
from tinygrad.shape.symbolic import sym_infer
|
from tinygrad.shape.symbolic import sym_infer
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import os, functools, platform, time, re, contextlib, operator, hashlib, pickle, sqlite3, cProfile, pstats, tempfile, pathlib, string, ctypes
|
import os, functools, platform, time, re, contextlib, operator, hashlib, pickle, sqlite3, cProfile, pstats, tempfile, pathlib, string, ctypes
|
||||||
import numpy as np
|
from urllib import request # NOTE: this has to be imported specifically
|
||||||
from urllib import request
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from typing import Dict, Tuple, Union, List, NamedTuple, Final, ClassVar, Optional, Iterable, Any, TypeVar, TYPE_CHECKING, Callable, Set, Sequence
|
from typing import Dict, Tuple, Union, List, ClassVar, Optional, Iterable, Any, TypeVar, TYPE_CHECKING, Callable, Sequence
|
||||||
if TYPE_CHECKING: # TODO: remove this and import TypeGuard from typing once minimum python supported version is 3.10
|
if TYPE_CHECKING: # TODO: remove this and import TypeGuard from typing once minimum python supported version is 3.10
|
||||||
from typing_extensions import TypeGuard
|
from typing_extensions import TypeGuard
|
||||||
|
|
||||||
@@ -101,104 +100,6 @@ class Profiling(contextlib.ContextDecorator):
|
|||||||
self.pr.disable()
|
self.pr.disable()
|
||||||
pstats.Stats(self.pr).strip_dirs().sort_stats(self.sort).print_stats(self.frac)
|
pstats.Stats(self.pr).strip_dirs().sort_stats(self.sort).print_stats(self.frac)
|
||||||
|
|
||||||
# **** tinygrad now supports dtypes! *****
|
|
||||||
|
|
||||||
# TODO: migrate this from NamedTuple -> dataclass
|
|
||||||
class DType(NamedTuple):
|
|
||||||
priority: int # this determines when things get upcasted
|
|
||||||
itemsize: int
|
|
||||||
name: str
|
|
||||||
np: Optional[type] # TODO: someday this will be removed with the "remove numpy" project
|
|
||||||
sz: int = 1
|
|
||||||
def __repr__(self): return f"dtypes.{INVERSE_DTYPES_DICT[self]}" if self.sz == 1 else f"dtypes._{INVERSE_DTYPES_DICT[self.scalar()]}{self.sz}"
|
|
||||||
def vec(self, sz:int):
|
|
||||||
assert sz > 1 and self.sz == 1, f"can't vectorize {self} with size {sz}"
|
|
||||||
return DType(self.priority, self.itemsize*sz, f"{INVERSE_DTYPES_DICT[self]}{str(sz)}", None, sz)
|
|
||||||
def scalar(self): return DTYPES_DICT[self.name[:-len(str(self.sz))]] if self.sz > 1 else self
|
|
||||||
|
|
||||||
# dependent typing?
|
|
||||||
class ImageDType(DType):
|
|
||||||
def __new__(cls, priority, itemsize, name, np, shape, base):
|
|
||||||
return super().__new__(cls, priority, itemsize, name, np)
|
|
||||||
def __init__(self, priority, itemsize, name, np, shape, base):
|
|
||||||
self.shape: Tuple[int, ...] = shape # arbitrary arg for the dtype, used in image for the shape
|
|
||||||
self.base: DType = base
|
|
||||||
super().__init__()
|
|
||||||
def scalar(self): return self.base
|
|
||||||
def vec(self, sz:int): return self.base.vec(sz)
|
|
||||||
def __repr__(self): return f"dtypes.{self.name}({self.shape})"
|
|
||||||
# TODO: fix this to not need these
|
|
||||||
def __hash__(self): return hash((super().__hash__(), self.shape))
|
|
||||||
def __eq__(self, x): return super().__eq__(x) and self.shape == x.shape
|
|
||||||
def __ne__(self, x): return super().__ne__(x) or self.shape != x.shape
|
|
||||||
|
|
||||||
class PtrDType(DType):
|
|
||||||
def __new__(cls, dt:DType): return super().__new__(cls, dt.priority, dt.itemsize, dt.name, dt.np, dt.sz)
|
|
||||||
def __repr__(self): return f"ptr.{super().__repr__()}"
|
|
||||||
|
|
||||||
class dtypes:
|
|
||||||
@staticmethod
|
|
||||||
def is_float(x: DType) -> bool: return x.scalar() in (dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64)
|
|
||||||
@staticmethod # static methds on top, or bool in the type info will refer to dtypes.bool
|
|
||||||
def is_int(x: DType) -> bool: return x.scalar() in (dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64) or dtypes.is_unsigned(x)
|
|
||||||
@staticmethod
|
|
||||||
def is_unsigned(x: DType) -> bool: return x.scalar() in (dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64)
|
|
||||||
@staticmethod
|
|
||||||
def from_np(x) -> DType: return DTYPES_DICT[np.dtype(x).name]
|
|
||||||
@staticmethod # NOTE: isinstance(True, int) is True in python
|
|
||||||
def from_py(x) -> DType: return dtypes.default_float if isinstance(x, float) else dtypes.bool if isinstance(x, bool) else dtypes.default_int
|
|
||||||
@staticmethod
|
|
||||||
def fields() -> Dict[str, DType]: return DTYPES_DICT
|
|
||||||
bool: Final[DType] = DType(0, 1, "bool", np.bool_)
|
|
||||||
int8: Final[DType] = DType(1, 1, "char", np.int8)
|
|
||||||
uint8: Final[DType] = DType(2, 1, "unsigned char", np.uint8)
|
|
||||||
int16: Final[DType] = DType(3, 2, "short", np.int16)
|
|
||||||
uint16: Final[DType] = DType(4, 2, "unsigned short", np.uint16)
|
|
||||||
int32: Final[DType] = DType(5, 4, "int", np.int32)
|
|
||||||
uint32: Final[DType] = DType(6, 4, "unsigned int", np.uint32)
|
|
||||||
int64: Final[DType] = DType(7, 8, "long", np.int64)
|
|
||||||
uint64: Final[DType] = DType(8, 8, "unsigned long", np.uint64)
|
|
||||||
float16: Final[DType] = DType(9, 2, "half", np.float16)
|
|
||||||
# bfloat16 has higher priority than float16, so least_upper_dtype(dtypes.int64, dtypes.uint64) = dtypes.float16
|
|
||||||
bfloat16: Final[DType] = DType(10, 2, "__bf16", None)
|
|
||||||
float32: Final[DType] = DType(11, 4, "float", np.float32)
|
|
||||||
float64: Final[DType] = DType(12, 8, "double", np.float64)
|
|
||||||
|
|
||||||
# dtype aliases
|
|
||||||
half = float16; float = float32; double = float64 # noqa: E702
|
|
||||||
uchar = uint8; ushort = uint16; uint = uint32; ulong = uint64 # noqa: E702
|
|
||||||
char = int8; short = int16; int = int32; long = int64 # noqa: E702
|
|
||||||
|
|
||||||
# NOTE: these are image dtypes
|
|
||||||
@staticmethod
|
|
||||||
def imageh(shp): return ImageDType(100, 2, "imageh", np.float16, shp, dtypes.float32)
|
|
||||||
@staticmethod
|
|
||||||
def imagef(shp): return ImageDType(100, 4, "imagef", np.float32, shp, dtypes.float32)
|
|
||||||
|
|
||||||
default_float: ClassVar[DType] = float32
|
|
||||||
default_int: ClassVar[DType] = int32
|
|
||||||
|
|
||||||
# https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html
|
|
||||||
# we don't support weak type and complex type
|
|
||||||
promo_lattice = { dtypes.bool: [dtypes.int8, dtypes.uint8],
|
|
||||||
dtypes.int8: [dtypes.int16], dtypes.int16: [dtypes.int32], dtypes.int32: [dtypes.int64], dtypes.int64: [dtypes.float16, dtypes.bfloat16],
|
|
||||||
dtypes.uint8: [dtypes.int16, dtypes.uint16], dtypes.uint16: [dtypes.int32, dtypes.uint32],
|
|
||||||
dtypes.uint32: [dtypes.int64, dtypes.uint64], dtypes.uint64: [dtypes.float16, dtypes.bfloat16],
|
|
||||||
dtypes.float16: [dtypes.float32], dtypes.bfloat16: [dtypes.float32], dtypes.float32: [dtypes.float64], }
|
|
||||||
|
|
||||||
@functools.lru_cache(None)
|
|
||||||
def _get_recursive_parents(dtype:DType) -> Set[DType]:
|
|
||||||
return set.union(*[_get_recursive_parents(d) for d in promo_lattice[dtype]], {dtype}) if dtype != dtypes.float64 else {dtypes.float64}
|
|
||||||
@functools.lru_cache(None)
|
|
||||||
def least_upper_dtype(*ds:DType) -> DType:
|
|
||||||
return min(set.intersection(*[_get_recursive_parents(d) for d in ds])) if not (images:=[d for d in ds if isinstance(d, ImageDType)]) else images[0]
|
|
||||||
def least_upper_float(dt:DType) -> DType: return dt if dtypes.is_float(dt) else least_upper_dtype(dt, dtypes.float32)
|
|
||||||
|
|
||||||
# HACK: staticmethods are not callable in 3.8 so we have to compare the class
|
|
||||||
DTYPES_DICT = {k: v for k, v in dtypes.__dict__.items() if (
|
|
||||||
not k.startswith('__') and not k.startswith('default') and not callable(v) and v.__class__ is not staticmethod)}
|
|
||||||
INVERSE_DTYPES_DICT = {v:k for k,v in DTYPES_DICT.items()}
|
|
||||||
|
|
||||||
# *** universal database cache ***
|
# *** universal database cache ***
|
||||||
|
|
||||||
_cache_dir: str = getenv("XDG_CACHE_HOME", os.path.expanduser("~/Library/Caches" if OSX else "~/.cache"))
|
_cache_dir: str = getenv("XDG_CACHE_HOME", os.path.expanduser("~/Library/Caches" if OSX else "~/.cache"))
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Callable, List, Tuple, Dict, cast, Union, Optional, TypeVar, Generic
|
from typing import Callable, List, Tuple, Dict, cast, Union, Optional, TypeVar, Generic
|
||||||
import functools, itertools, operator
|
import functools, itertools, operator
|
||||||
from tinygrad.helpers import DEBUG, DType, merge_dicts, getenv, all_int, Context, GRAPH
|
from tinygrad.dtype import DType
|
||||||
|
from tinygrad.helpers import DEBUG, merge_dicts, getenv, all_int, Context, GRAPH
|
||||||
from tinygrad.device import Device, JITRunner, CompiledASTRunner, Buffer
|
from tinygrad.device import Device, JITRunner, CompiledASTRunner, Buffer
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.shape.shapetracker import ShapeTracker
|
from tinygrad.shape.shapetracker import ShapeTracker
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
|||||||
import sys, math
|
import sys, math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Union, Optional, Any, Tuple, List, Set, Dict
|
from typing import Union, Optional, Any, Tuple, List, Set, Dict
|
||||||
from tinygrad.helpers import prod, dtypes, DType, merge_dicts, flatten, getenv, dedup, ImageDType, DEBUG, all_int, all_same
|
from tinygrad.dtype import dtypes, DType, ImageDType
|
||||||
|
from tinygrad.helpers import prod, merge_dicts, flatten, getenv, dedup, DEBUG, all_int, all_same
|
||||||
from tinygrad.ops import LoadOps, UnaryOps, BinaryOps, TernaryOps, ReduceOps, BufferOps
|
from tinygrad.ops import LoadOps, UnaryOps, BinaryOps, TernaryOps, ReduceOps, BufferOps
|
||||||
from tinygrad.ops import Op, LazyOp, ConstBuffer, MemBuffer, ScheduleItem, vars_from_ast
|
from tinygrad.ops import Op, LazyOp, ConstBuffer, MemBuffer, ScheduleItem, vars_from_ast
|
||||||
from tinygrad.shape.symbolic import sint, Variable
|
from tinygrad.shape.symbolic import sint, Variable
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import math
|
import math
|
||||||
from typing import Tuple, Optional
|
from typing import Tuple, Optional
|
||||||
from tinygrad.helpers import argsort, DType
|
from tinygrad.helpers import argsort
|
||||||
|
from tinygrad.dtype import DType
|
||||||
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps
|
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps
|
||||||
from tinygrad.tensor import Function
|
from tinygrad.tensor import Function
|
||||||
from tinygrad.lazy import LazyBuffer
|
from tinygrad.lazy import LazyBuffer
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ from tqdm import tqdm
|
|||||||
from typing import Dict, Union, List, Optional, Any, Tuple
|
from typing import Dict, Union, List, Optional, Any, Tuple
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.ops import GlobalCounters
|
from tinygrad.ops import GlobalCounters
|
||||||
from tinygrad.helpers import dtypes, prod, argsort, DEBUG, Timing, CI, unwrap
|
from tinygrad.dtype import dtypes
|
||||||
|
from tinygrad.helpers import prod, argsort, DEBUG, Timing, CI, unwrap
|
||||||
from tinygrad.shape.view import strides_for_shape
|
from tinygrad.shape.view import strides_for_shape
|
||||||
|
|
||||||
safe_dtypes = {"F16": dtypes.float16, "F32": dtypes.float32, "U8": dtypes.uint8, "I8": dtypes.int8, "I32": dtypes.int32, "I64": dtypes.int64,
|
safe_dtypes = {"F16": dtypes.float16, "F32": dtypes.float32, "U8": dtypes.uint8, "I8": dtypes.int8, "I32": dtypes.int32, "I64": dtypes.int64,
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
|||||||
from typing import TYPE_CHECKING, Union, Type, Tuple, Any, List, Dict, Callable, ClassVar
|
from typing import TYPE_CHECKING, Union, Type, Tuple, Any, List, Dict, Callable, ClassVar
|
||||||
import functools
|
import functools
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from tinygrad.helpers import dtypes, prod, DType, dedup
|
from tinygrad.helpers import prod, dedup
|
||||||
|
from tinygrad.dtype import dtypes, DType
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import math, functools
|
|||||||
from collections import defaultdict, Counter
|
from collections import defaultdict, Counter
|
||||||
from tinygrad.codegen.linearizer import UOps, UOp
|
from tinygrad.codegen.linearizer import UOps, UOp
|
||||||
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps
|
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps
|
||||||
from tinygrad.helpers import ImageDType, dtypes, prod, DType, PtrDType, strip_parens, getenv
|
from tinygrad.helpers import prod, strip_parens, getenv
|
||||||
|
from tinygrad.dtype import ImageDType, dtypes, DType, PtrDType
|
||||||
|
|
||||||
class CStyleLanguage(NamedTuple):
|
class CStyleLanguage(NamedTuple):
|
||||||
size_prefix: str = "int"
|
size_prefix: str = "int"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Final, Dict, Callable, Any, List, Optional, Tuple
|
from typing import Final, Dict, Callable, Any, List, Optional, Tuple
|
||||||
from llvmlite import ir
|
from llvmlite import ir
|
||||||
from tinygrad.codegen.linearizer import UOps, UOp
|
from tinygrad.codegen.linearizer import UOps, UOp
|
||||||
from tinygrad.helpers import DType, PtrDType, dtypes
|
from tinygrad.dtype import DType, PtrDType, dtypes
|
||||||
from tinygrad.ops import Op, UnaryOps, BinaryOps, TernaryOps
|
from tinygrad.ops import Op, UnaryOps, BinaryOps, TernaryOps
|
||||||
|
|
||||||
MFLAGS = ('nsz', 'arcp', 'contract', 'afn', 'reassoc') # All from fast math, but nnan and ninf
|
MFLAGS = ('nsz', 'arcp', 'contract', 'afn', 'reassoc') # All from fast math, but nnan and ninf
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
from typing import List, Any, Dict, cast, Optional
|
from typing import List, Any, Dict, cast, Optional
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import Metal
|
import Metal
|
||||||
from tinygrad.helpers import dtypes, dedup, unwrap2
|
from tinygrad.dtype import dtypes
|
||||||
|
from tinygrad.helpers import dedup, unwrap2
|
||||||
from tinygrad.device import Buffer, CompiledASTRunner, update_stats
|
from tinygrad.device import Buffer, CompiledASTRunner, update_stats
|
||||||
from tinygrad.jit import JitItem, get_input_replace, get_jit_stats, get_jc_idxs_with_updatable_launch_dims, GraphException
|
from tinygrad.jit import JitItem, get_input_replace, get_jit_stats, get_jc_idxs_with_updatable_launch_dims, GraphException
|
||||||
from tinygrad.shape.symbolic import Variable
|
from tinygrad.shape.symbolic import Variable
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os, mmap, _posixshmem
|
import os, mmap, _posixshmem
|
||||||
from typing import Callable, Dict, Tuple
|
from typing import Callable, Dict, Tuple
|
||||||
from tinygrad.helpers import prod, DType, OSX, dtypes
|
from tinygrad.dtype import DType, dtypes
|
||||||
|
from tinygrad.helpers import prod, OSX
|
||||||
from tinygrad.device import Interpreted, Allocator
|
from tinygrad.device import Interpreted, Allocator
|
||||||
from tinygrad.ops import Op, MovementOps, UnaryOps
|
from tinygrad.ops import Op, MovementOps, UnaryOps
|
||||||
from tinygrad.shape.view import strides_for_shape
|
from tinygrad.shape.view import strides_for_shape
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
|||||||
from typing import Tuple, Optional, List
|
from typing import Tuple, Optional, List
|
||||||
import ctypes, functools
|
import ctypes, functools
|
||||||
import gpuctypes.opencl as cl
|
import gpuctypes.opencl as cl
|
||||||
from tinygrad.helpers import init_c_var, to_char_p_p, from_mv, OSX, ImageDType, DEBUG
|
from tinygrad.helpers import init_c_var, to_char_p_p, from_mv, OSX, DEBUG
|
||||||
|
from tinygrad.dtype import ImageDType
|
||||||
from tinygrad.codegen.kernel import LinearizerOptions
|
from tinygrad.codegen.kernel import LinearizerOptions
|
||||||
from tinygrad.renderer.cstyle import OpenCLRenderer
|
from tinygrad.renderer.cstyle import OpenCLRenderer
|
||||||
from tinygrad.device import Compiled, LRUAllocator
|
from tinygrad.device import Compiled, LRUAllocator
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import numpy as np
|
|||||||
from typing import Dict, Callable
|
from typing import Dict, Callable
|
||||||
from tinygrad.ops import BufferOps, UnaryOps, BinaryOps, TernaryOps, ReduceOps, MovementOps, Op
|
from tinygrad.ops import BufferOps, UnaryOps, BinaryOps, TernaryOps, ReduceOps, MovementOps, Op
|
||||||
from tinygrad.device import Interpreted, Allocator
|
from tinygrad.device import Interpreted, Allocator
|
||||||
from tinygrad.helpers import getenv, dtypes, flatten
|
from tinygrad.dtype import dtypes
|
||||||
|
from tinygrad.helpers import getenv, flatten
|
||||||
from tinygrad.runtime.ops_cpu import einsum_mulacc, reduce_axis
|
from tinygrad.runtime.ops_cpu import einsum_mulacc, reduce_axis
|
||||||
|
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() else ("mps" if getenv("MPS", 0) else "cpu"))
|
device = torch.device("cuda:0" if torch.cuda.is_available() else ("mps" if getenv("MPS", 0) else "cpu"))
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from functools import partialmethod, reduce
|
|||||||
from itertools import accumulate
|
from itertools import accumulate
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from tinygrad.helpers import DType, dtypes, ImageDType, least_upper_float, least_upper_dtype
|
from tinygrad.dtype import DType, dtypes, ImageDType, least_upper_float, least_upper_dtype
|
||||||
from tinygrad.helpers import argfix, make_pair, getenv, IMAGE, DEBUG, flatten, prod, all_int, round_up, merge_dicts, fully_flatten
|
from tinygrad.helpers import argfix, make_pair, getenv, IMAGE, DEBUG, flatten, prod, all_int, round_up, merge_dicts, fully_flatten
|
||||||
from tinygrad.lazy import LazyBuffer, create_schedule
|
from tinygrad.lazy import LazyBuffer, create_schedule
|
||||||
from tinygrad.ops import LoadOps
|
from tinygrad.ops import LoadOps
|
||||||
|
|||||||
Reference in New Issue
Block a user