mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
fixup openpilot/compile.py
This commit is contained in:
@@ -6,7 +6,10 @@ import os
|
||||
import time
|
||||
import io
|
||||
|
||||
if not int(os.getenv("NOIMAGE", 0)):
|
||||
os.environ['GRAPH'] = '1'
|
||||
os.environ['OPT'] = '99'
|
||||
os.environ['STRONG_CACHE'] = '1'
|
||||
if os.getenv("GPU", None) is None:
|
||||
os.environ['OPENCL'] = '1'
|
||||
|
||||
DEBUGCL = int(os.getenv("DEBUGCL", 0))
|
||||
@@ -16,6 +19,7 @@ import numpy as np
|
||||
|
||||
import tinygrad.ops as ops
|
||||
|
||||
from tinygrad.llops import ops_gpu
|
||||
from tinygrad.llops.ops_gpu import CL
|
||||
from extra.utils import fetch
|
||||
from extra.onnx import get_run_onnx
|
||||
@@ -41,6 +45,7 @@ def get_random_input_tensors():
|
||||
return inputs, np_inputs
|
||||
|
||||
if __name__ == "__main__":
|
||||
Tensor.no_grad = True
|
||||
ops.GRAPH = False
|
||||
|
||||
dat = fetch(OPENPILOT_MODEL)
|
||||
@@ -65,6 +70,8 @@ if __name__ == "__main__":
|
||||
|
||||
CL.CACHE = []
|
||||
ops.GRAPH = True
|
||||
ops_gpu.DEBUG = 2
|
||||
CL.kernel_count = -1
|
||||
tinygrad_out.realize()
|
||||
ops.GRAPH = False
|
||||
print("kernel count:", len(CL.CACHE))
|
||||
|
||||
@@ -22,7 +22,7 @@ class CLBuffer:
|
||||
else: CL.mem_used -= self.cl.size
|
||||
|
||||
class CL:
|
||||
CACHE, kernel_count, mem_used = None, 0, 0
|
||||
CACHE, kernel_count, mem_used = None, -1, 0
|
||||
BUFFER_CACHE : Dict[int, List[cl.Buffer]] = defaultdict(list)
|
||||
cl_ctx : Optional[cl.Context] = None
|
||||
cl_queue : Optional[cl.CommandQueue] = None
|
||||
@@ -51,7 +51,7 @@ class CLProgram:
|
||||
def __call__(self, *args):
|
||||
CL.kernel_count += 1
|
||||
if CL.CACHE is not None: CL.CACHE.append((self, args))
|
||||
else: e = self.clprg(CL().cl_queue, *args)
|
||||
e = self.clprg(CL().cl_queue, *args)
|
||||
if DEBUG >= 2: CL.cl_queue.finish()
|
||||
if DEBUG >= 1:
|
||||
print(f"**CL** {CL.kernel_count:6d} {self.name:20s} args {len(args[2:]):5d} size {prod(args[0]):8d} kernels {str(args[0]):20s} {str(args[1]):20s}" + \
|
||||
|
||||
@@ -273,7 +273,8 @@ class LazyBuffer:
|
||||
# TODO: these can be properties on the device buffer
|
||||
from accel.opencl.preprocessing import preprocessing_op, postprocessing_op # type: ignore
|
||||
x,w,Cn = preprocessing_op(x, w, C)
|
||||
ret = LazyBuffer(x.device, C.out_shape, ProcessingOps, LazyOp(op, (x, w), C))
|
||||
w.realize().image
|
||||
ret = LazyBuffer(x.device, Cn.out_shape, ProcessingOps, LazyOp(op, (x, w), Cn))
|
||||
return postprocessing_op(ret, Cn, C)
|
||||
else:
|
||||
return LazyBuffer(x.device, C.out_shape, ProcessingOps, LazyOp(op, (x, w), C))
|
||||
|
||||
Reference in New Issue
Block a user