fixup openpilot/compile.py

This commit is contained in:
George Hotz
2022-07-11 13:59:09 -07:00
parent 5e46561f7e
commit d651caa864
3 changed files with 12 additions and 4 deletions

View File

@@ -6,7 +6,10 @@ import os
import time
import io
if not int(os.getenv("NOIMAGE", 0)):
os.environ['GRAPH'] = '1'
os.environ['OPT'] = '99'
os.environ['STRONG_CACHE'] = '1'
if os.getenv("GPU", None) is None:
os.environ['OPENCL'] = '1'
DEBUGCL = int(os.getenv("DEBUGCL", 0))
@@ -16,6 +19,7 @@ import numpy as np
import tinygrad.ops as ops
from tinygrad.llops import ops_gpu
from tinygrad.llops.ops_gpu import CL
from extra.utils import fetch
from extra.onnx import get_run_onnx
@@ -41,6 +45,7 @@ def get_random_input_tensors():
return inputs, np_inputs
if __name__ == "__main__":
Tensor.no_grad = True
ops.GRAPH = False
dat = fetch(OPENPILOT_MODEL)
@@ -65,6 +70,8 @@ if __name__ == "__main__":
CL.CACHE = []
ops.GRAPH = True
ops_gpu.DEBUG = 2
CL.kernel_count = -1
tinygrad_out.realize()
ops.GRAPH = False
print("kernel count:", len(CL.CACHE))

View File

@@ -22,7 +22,7 @@ class CLBuffer:
else: CL.mem_used -= self.cl.size
class CL:
CACHE, kernel_count, mem_used = None, 0, 0
CACHE, kernel_count, mem_used = None, -1, 0
BUFFER_CACHE : Dict[int, List[cl.Buffer]] = defaultdict(list)
cl_ctx : Optional[cl.Context] = None
cl_queue : Optional[cl.CommandQueue] = None
@@ -51,7 +51,7 @@ class CLProgram:
def __call__(self, *args):
CL.kernel_count += 1
if CL.CACHE is not None: CL.CACHE.append((self, args))
else: e = self.clprg(CL().cl_queue, *args)
e = self.clprg(CL().cl_queue, *args)
if DEBUG >= 2: CL.cl_queue.finish()
if DEBUG >= 1:
print(f"**CL** {CL.kernel_count:6d} {self.name:20s} args {len(args[2:]):5d} size {prod(args[0]):8d} kernels {str(args[0]):20s} {str(args[1]):20s}" + \

View File

@@ -273,7 +273,8 @@ class LazyBuffer:
# TODO: these can be properties on the device buffer
from accel.opencl.preprocessing import preprocessing_op, postprocessing_op # type: ignore
x,w,Cn = preprocessing_op(x, w, C)
ret = LazyBuffer(x.device, C.out_shape, ProcessingOps, LazyOp(op, (x, w), C))
w.realize().image
ret = LazyBuffer(x.device, Cn.out_shape, ProcessingOps, LazyOp(op, (x, w), Cn))
return postprocessing_op(ret, Cn, C)
else:
return LazyBuffer(x.device, C.out_shape, ProcessingOps, LazyOp(op, (x, w), C))