create engine folder and move code (#3948)

* retry

* older tf

* that
This commit is contained in:
George Hotz
2024-03-26 20:38:03 -07:00
committed by GitHub
parent 629cbc5587
commit 150ea2eb76
51 changed files with 54 additions and 54 deletions

View File

@@ -73,7 +73,7 @@ assert out.as_buffer().cast('I')[0] == 5
print("******** third, the LazyBuffer ***********")
from tinygrad.lazy import LazyBuffer, LoadOps
from tinygrad.realize import run_schedule, create_schedule
from tinygrad.engine.realize import run_schedule, create_schedule
# allocate some values + load in values
a = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, DEVICE)

View File

@@ -7,7 +7,7 @@ from tinygrad.nn.state import get_parameters
from tinygrad.nn import optim
from tinygrad import Tensor, GlobalCounters
from tinygrad.helpers import getenv
from tinygrad.features.jit import CacheCollector
from tinygrad.engine.jit import CacheCollector
def tensors_allocated():
return sum(isinstance(x, Tensor) for x in gc.get_objects())

View File

@@ -9,7 +9,7 @@ import numpy as np
from PIL import Image
from tinygrad.tensor import Tensor
from tinygrad.helpers import getenv, fetch, Timing
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from extra.models.efficientnet import EfficientNet
np.set_printoptions(suppress=True)

View File

@@ -8,7 +8,7 @@ from tinygrad.features.search import time_linearizer, beam_search, bufs_from_lin
from tinygrad.helpers import ansilen, DEBUG, getenv
from tinygrad.shape.symbolic import sym_infer
from tinygrad.dtype import dtypes
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
if __name__ == "__main__":
if getenv("HALF"):

View File

@@ -3,7 +3,7 @@ start = time.perf_counter()
from pathlib import Path
import numpy as np
from tinygrad import Tensor, Device, dtypes, GlobalCounters
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.nn.state import get_parameters, load_state_dict, safe_load
from tinygrad.helpers import getenv, Timing
from examples.mlperf import helpers
@@ -103,7 +103,7 @@ def eval_retinanet():
coco_eval = COCOeval(coco, iouType="bbox")
coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng)
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
mdlrun = TinyJit(lambda x: mdl(input_fixup(x)).realize())
n, bs = 0, 8

View File

@@ -13,7 +13,7 @@ from tinygrad import Device, GlobalCounters, dtypes, Tensor
from tinygrad.helpers import Timing, Context, getenv, fetch, colored
from tinygrad.nn import Conv2d, Linear, GroupNorm, LayerNorm, Embedding
from tinygrad.nn.state import torch_load, load_state_dict, get_state_dict
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
class AttnBlock:
def __init__(self, in_channels):

View File

@@ -9,7 +9,7 @@ from tinygrad import nn, dtypes
from tinygrad.helpers import fetch
from tinygrad.nn.state import torch_load
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from unidecode import unidecode
LRELU_SLOPE = 0.1

View File

@@ -6,7 +6,7 @@ import base64
import multiprocessing
import numpy as np
from typing import Optional, Union, Literal, List
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.nn.state import torch_load, load_state_dict
from tinygrad.helpers import getenv, DEBUG, CI, fetch
import tinygrad.nn as nn

View File

@@ -3,7 +3,7 @@ from tinygrad.ops import LoadOps
from tinygrad.codegen.linearizer import Linearizer
from test.external.fuzz_linearizer import run_linearizer
from tinygrad.codegen.kernel import Opt, OptOps
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
N = 17**3

View File

@@ -91,7 +91,7 @@ def pad_input(volume, roi_shape, strides, padding_mode="constant", padding_val=-
return F.pad(torch.from_numpy(volume), paddings, mode=padding_mode, value=padding_val).numpy(), paddings
def sliding_window_inference(model, inputs, labels, roi_shape=(128, 128, 128), overlap=0.5):
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
mdl_run = TinyJit(lambda x: model(x).realize())
image_shape, dim = list(inputs.shape[2:]), len(inputs.shape[2:])
strides = [int(roi_shape[i] * (1 - overlap)) for i in range(dim)]

View File

@@ -1,7 +1,7 @@
from typing import Tuple, Dict, List
from tinygrad.dtype import DType
from tinygrad.tensor import Device, Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.nn.state import get_state_dict
from tinygrad.dtype import dtypes
import json

View File

@@ -32,7 +32,7 @@ except RuntimeError:
print("no torch metal conv")
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad import Device
b = Tensor(nb)
c = Tensor(nc)

View File

@@ -116,7 +116,7 @@ tm = min([torch_prog(b, c) for _ in range(20)])
print(f"{N*N:10d} {tm*1e6:9.2f} us, would be {FLOPS*1e-9/tm:9.2f} GFLOPS matmul in torch")
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
b = Tensor(nb)
c = Tensor(nc)
# TODO: slowness without the JIT I suspect comes from a lack of a caching allocator

View File

@@ -4,7 +4,7 @@ import numpy as np
import time, torch, torch.mps
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad import Device, GlobalCounters, dtypes
from tinygrad.helpers import colored, getenv, CI, flat_mv
@@ -108,7 +108,7 @@ metalalloc.copyout(flat_mv(metal_a.data), a)
np.testing.assert_allclose(metal_a, torch_a, atol=5e-3)
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
b = Tensor(nb)
c = Tensor(nc)
# TODO: slowness without the JIT I suspect comes from a lack of a caching allocator

View File

@@ -30,7 +30,7 @@ except ImportError:
import os
from tinygrad.tensor import Tensor
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
# define the compute
A = Tensor.rand(M, K, device="clang")

View File

@@ -1,5 +1,5 @@
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.nn import Linear, Embedding
from tinygrad.helpers import fetch
import numpy as np

View File

@@ -2,7 +2,7 @@ import numpy as np
from tqdm import trange
from tinygrad.tensor import Tensor
from tinygrad.helpers import CI
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=lambda out,y: out.sparse_categorical_crossentropy(y),

View File

@@ -16,7 +16,7 @@ from extra.onnx import get_run_onnx
from tinygrad import Tensor, Device, GlobalCounters, dtypes
from tinygrad.dtype import ImageDType
from tinygrad.helpers import partition, Context, fetch, getenv, GRAPH, DEBUG
from tinygrad.realize import run_schedule, lower_schedule_item, create_schedule
from tinygrad.engine.realize import run_schedule, lower_schedule_item, create_schedule
from tinygrad.ops import LoadOps, ScheduleItem
Device.DEFAULT = "GPU"

View File

@@ -14,7 +14,7 @@ setup(name='tinygrad',
license='MIT',
long_description=long_description,
long_description_content_type='text/markdown',
packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer',
packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
'tinygrad.runtime', 'tinygrad.runtime.driver', 'tinygrad.runtime.graph', 'tinygrad.shape', 'tinygrad.features'],
classifiers=[
"Programming Language :: Python :: 3",
@@ -54,7 +54,7 @@ setup(name='tinygrad',
"hypothesis",
],
'testing_tf': [
"tensorflow",
"tensorflow==2.15.1",
"tensorflow_addons",
]
},

View File

@@ -3,8 +3,8 @@ from tinygrad import Tensor, Device
from tinygrad.lazy import LazyBuffer
from tinygrad.ops import ReduceOps, GlobalCounters
from tinygrad.features.multi import MultiLazyBuffer, all_reduce
from tinygrad.features.jit import TinyJit
from tinygrad.realize import create_schedule, run_schedule
from tinygrad.engine.jit import TinyJit
from tinygrad.engine.realize import create_schedule, run_schedule
from tinygrad.helpers import getenv, Context, RING
from typing import List, Union

View File

@@ -66,7 +66,7 @@ def benchmark_model(m, devices, validate_outs=False):
tinygrad_model = get_run_onnx(onnx_model)
benchmark(m, f"tinygrad_{device.lower()}_jitless", lambda: {k:v.numpy() for k,v in tinygrad_model(inputs).items()})
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
tinygrad_jitted_model = TinyJit(lambda **kwargs: {k:v.realize() for k,v in tinygrad_model(kwargs).items()})
for _ in range(3): {k:v.numpy() for k,v in tinygrad_jitted_model(**inputs).items()}
benchmark(m, f"tinygrad_{device.lower()}_jit", lambda: {k:v.numpy() for k,v in tinygrad_jitted_model(**inputs).items()}) # noqa: F821

View File

@@ -2,7 +2,7 @@ import time, unittest
from tinygrad.runtime.driver.hip_comgr import compile_hip
from tinygrad import Tensor
from tinygrad.device import Device
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad.codegen.linearizer import Linearizer
class TestHIPCompileSpeed(unittest.TestCase):

View File

@@ -4,7 +4,7 @@ from tinygrad.device import Device, Buffer, BufferXfer
from tinygrad.dtype import dtypes
from tinygrad.runtime.driver.hsa import AQLQueue
from tinygrad.runtime.graph.hsa import VirtAQLQueue, HSAGraph
from tinygrad.features.jit import JitItem
from tinygrad.engine.jit import JitItem
def get_hsa_inc_prog(dev, inc=1):
prg = f"""

View File

@@ -2,7 +2,7 @@
import unittest
import numpy as np
from tinygrad import Tensor, dtypes
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.helpers import CI
from test.helpers import derandomize_model

View File

@@ -17,7 +17,7 @@ from tinygrad.helpers import getenv
from tinygrad.nn import optim
#from tinygrad.lazy import PUSH_PERMUTES
PUSH_PERMUTES = False
from tinygrad.features.jit import CacheCollector
from tinygrad.engine.jit import CacheCollector
class CLCache:
def __init__(self, allowed=None, strict=False, preclear=True, var_vals=None):

View File

@@ -4,7 +4,7 @@ from tinygrad.tensor import Tensor
from tinygrad.codegen.linearizer import Linearizer
from tinygrad.renderer.cstyle import OpenCLRenderer
from tinygrad.features.graph import graph_uops
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad.nn import Conv2d
class TestUopsGraph(unittest.TestCase):

View File

@@ -2,7 +2,7 @@ import unittest, time, gc
import numpy as np
from tinygrad.nn import optim
from tinygrad.nn.state import get_parameters
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad import Tensor, Device, GlobalCounters, dtypes
from tinygrad.helpers import CI, getenv
from tinygrad.shape.symbolic import Variable

View File

@@ -3,7 +3,7 @@ import unittest
from tinygrad.tensor import Tensor
from tinygrad.ops import LoadOps
from tinygrad.nn import Conv2d
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
class TestConvShapetracker(unittest.TestCase):
def test_conv_3x3_one_view(self):

View File

@@ -87,7 +87,7 @@ class TestCustomFunction(unittest.TestCase):
@unittest.skipIf(Device.DEFAULT in ["CPU"], "atan2_cpu not jittable")
def test_atan2_jit(self):
# custom ops even work in the JIT!
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
@TinyJit
def jitted_atan2(a:Tensor, b:Tensor) -> Tensor:

View File

@@ -6,7 +6,7 @@ import numpy as np
from hypothesis import given, strategies as strat, settings
from tinygrad.dtype import DType
from tinygrad.helpers import CI, getenv
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad.ops import UnaryOps, get_lazyop_info
from test.helpers import is_dtype_supported

View File

@@ -2,7 +2,7 @@ import unittest
import time
import numpy as np
from tinygrad import Tensor, dtypes
from tinygrad.realize import run_schedule, create_schedule, lower_schedule_item
from tinygrad.engine.realize import run_schedule, create_schedule, lower_schedule_item
class TestFusionOp(unittest.TestCase):
def test_contiguous_add(self):

View File

@@ -4,7 +4,7 @@ import numpy as np
from test.helpers import assert_jit_cache_len
from tinygrad.tensor import Tensor
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.device import Device
from tinygrad.helpers import CI

View File

@@ -3,7 +3,7 @@ import numpy as np
import unittest
from tinygrad import Tensor, Device, dtypes
from tinygrad.lazy import LazyBuffer, ReduceOps
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
class TestLazyBuffer(unittest.TestCase):
def test_fromcpu_shape_tracker(self):

View File

@@ -1,6 +1,6 @@
import unittest
from tinygrad.tensor import Tensor
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
# stuff needed to unpack a kernel
# ruff: noqa: F401

View File

@@ -9,8 +9,8 @@ from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.view import View
from tinygrad.shape.symbolic import MulNode, Variable, NumNode, Node
from tinygrad.tensor import Tensor
from tinygrad.features.jit import CacheCollector
from tinygrad.realize import create_schedule, run_schedule
from tinygrad.engine.jit import CacheCollector
from tinygrad.engine.realize import create_schedule, run_schedule
from tinygrad.helpers import prod, Context
from tinygrad.dtype import DType, dtypes
from tinygrad.codegen.uops import UOpGraph

View File

@@ -5,7 +5,7 @@ from tinygrad.device import BufferCopy
from tinygrad.ops import LoadOps, ReduceOps
from tinygrad.helpers import CI, prod, Context
from tinygrad.nn.state import get_parameters, get_state_dict
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad.features.multi import all_reduce, MultiLazyBuffer
from random import randint
import numpy as np

View File

@@ -9,7 +9,7 @@ from tinygrad.ops import LoadOps
from tinygrad.helpers import DEBUG, GRAPH
from tinygrad.codegen.linearizer import Linearizer
from tinygrad.features.graph import print_tree, realized_lazybuffer
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad import nn, dtypes
def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True):

View File

@@ -1,7 +1,7 @@
import unittest
from tinygrad.codegen.linearizer import Linearizer
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad.features.search import time_linearizer, bufs_from_lin
from tinygrad.device import Device, Buffer
from tinygrad.ops import LoadOps

View File

@@ -13,7 +13,7 @@ from tinygrad import Device, GlobalCounters
from tinygrad.tensor import Tensor
from tinygrad.nn import Conv2d
from tinygrad.helpers import colored, getenv, CI
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")]

View File

@@ -1,7 +1,7 @@
import unittest
from test.helpers import assert_jit_cache_len
from tinygrad.features.jit import TinyJit
from tinygrad.engine.jit import TinyJit
from tinygrad.shape.symbolic import Variable
from tinygrad.tensor import Tensor
import numpy as np

View File

@@ -5,7 +5,7 @@ from tinygrad.tensor import Tensor
from tinygrad.dtype import dtypes, DType, PtrDType
from tinygrad.device import Buffer, Device, CompiledASTRunner
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
from tinygrad.codegen.linearizer import UOps, UOp
from tinygrad.codegen.uops import exec_alu, UOpGraph
from test.helpers import is_dtype_supported

View File

@@ -1,6 +1,6 @@
import unittest
from tinygrad import Tensor
from tinygrad.realize import create_schedule, lower_schedule_item
from tinygrad.engine.realize import create_schedule, lower_schedule_item
# TODO: can copy this in here when we remove it
#from tinygrad.ops import get_lazyop_info

View File

@@ -3,7 +3,7 @@ from tinygrad import Tensor, GlobalCounters
from tinygrad.helpers import Timing, CI, Profiling, WINO, DEBUG
from tinygrad.ops import LoadOps
from tinygrad.codegen.linearizer import Linearizer
from tinygrad.realize import create_schedule
from tinygrad.engine.realize import create_schedule
class TestWinograd(unittest.TestCase):
def setUp(self):

View File

@@ -1,5 +1,5 @@
from tinygrad.tensor import Tensor # noqa: F401
from tinygrad.features.jit import TinyJit # noqa: F401
from tinygrad.engine.jit import TinyJit # noqa: F401
from tinygrad.shape.symbolic import Variable # noqa: F401
from tinygrad.dtype import dtypes # noqa: F401
from tinygrad.ops import GlobalCounters # noqa: F401

View File

@@ -46,7 +46,7 @@ class JITRunner:
self.mem_estimate:sint = 0
def exec(self, rawbufs:List[Buffer], var_vals:Optional[Dict[Variable, int]]=None) -> Optional[float]:
var_vals = var_vals if var_vals is not None else {}
from tinygrad.features.jit import CacheCollector
from tinygrad.engine.jit import CacheCollector
et = self(rawbufs, var_vals)
if CACHECOLLECTING: CacheCollector.add(self, rawbufs, var_vals)
return et

View File

@@ -5,7 +5,7 @@ from tinygrad.helpers import init_c_var, GraphException
from tinygrad.device import CompiledASTRunner, update_stats, Buffer, MultiDeviceJITGraph, BufferXfer
from tinygrad.runtime.ops_cuda import CUDADevice, check, encode_args, cu_time_execution
from tinygrad.shape.symbolic import Variable
from tinygrad.features.jit import JitItem, get_input_replace, get_jit_stats, \
from tinygrad.engine.jit import JitItem, get_input_replace, get_jit_stats, \
get_jc_idxs_with_updatable_launch_dims, get_jc_idxs_with_updatable_var_vals
class CUDAGraph(MultiDeviceJITGraph):

View File

@@ -4,7 +4,7 @@ from tinygrad.helpers import GraphException, init_c_var, round_up
from tinygrad.device import Compiled, Buffer, BufferOptions, CompiledASTRunner, BufferXfer, MultiDeviceJITGraph, update_stats
from tinygrad.shape.symbolic import Variable
from tinygrad.runtime.ops_hsa import HSADevice, PROFILE, Profiler
from tinygrad.features.jit import JitItem, get_input_replace, get_jit_stats, \
from tinygrad.engine.jit import JitItem, get_input_replace, get_jit_stats, \
get_jc_idxs_with_updatable_launch_dims, get_jc_idxs_with_updatable_var_vals
import tinygrad.runtime.autogen.hsa as hsa
from tinygrad.runtime.driver.hsa import check, AQLQueue, AQL_PACKET_SIZE, EMPTY_SIGNAL

View File

@@ -3,7 +3,7 @@ import Metal
from tinygrad.dtype import dtypes
from tinygrad.helpers import dedup, unwrap2, GraphException
from tinygrad.device import Buffer, CompiledASTRunner, update_stats
from tinygrad.features.jit import JitItem, get_input_replace, get_jit_stats, get_jc_idxs_with_updatable_launch_dims
from tinygrad.engine.jit import JitItem, get_input_replace, get_jit_stats, get_jc_idxs_with_updatable_launch_dims
from tinygrad.shape.symbolic import Variable
from tinygrad.runtime.ops_metal import MetalDevice, wait_check

View File

@@ -14,7 +14,7 @@ from tinygrad.features.multi import MultiLazyBuffer
from tinygrad.ops import LoadOps
from tinygrad.device import Buffer, Device
from tinygrad.shape.symbolic import sint
from tinygrad.realize import run_schedule, create_schedule
from tinygrad.engine.realize import run_schedule, create_schedule
# **** start with two base classes, Tensor and Function ****