diff --git a/examples/openpilot/compile2.py b/examples/openpilot/compile2.py index f907097a60..811a5fc240 100644 --- a/examples/openpilot/compile2.py +++ b/examples/openpilot/compile2.py @@ -6,6 +6,7 @@ sys.path.insert(0, str(pathlib.Path(__file__).parents[1])) if "FLOAT16" not in os.environ: os.environ["FLOAT16"] = "1" if "IMAGE" not in os.environ: os.environ["IMAGE"] = "2" if "NOLOCALS" not in os.environ: os.environ["NOLOCALS"] = "1" +if "NATIVE_MATH" not in os.environ: os.environ["NATIVE_MATH"] = "1" OPENPILOT_MODEL = "https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx" diff --git a/examples/openpilot/compile3.py b/examples/openpilot/compile3.py index a04c3c2652..5dbefc8398 100644 --- a/examples/openpilot/compile3.py +++ b/examples/openpilot/compile3.py @@ -3,6 +3,7 @@ import numpy as np if "FLOAT16" not in os.environ: os.environ["FLOAT16"] = "1" if "IMAGE" not in os.environ: os.environ["IMAGE"] = "2" if "NOLOCALS" not in os.environ: os.environ["NOLOCALS"] = "1" +if "NATIVE_MATH" not in os.environ: os.environ["NATIVE_MATH"] = "1" from tinygrad import fetch, Tensor, TinyJit, Device, Context, GlobalCounters from tinygrad.helpers import OSX, DEBUG, Timing diff --git a/tinygrad/renderer/cstyle.py b/tinygrad/renderer/cstyle.py index f746d032a0..fe58b95d24 100644 --- a/tinygrad/renderer/cstyle.py +++ b/tinygrad/renderer/cstyle.py @@ -227,6 +227,11 @@ class ClangRenderer(CStyleLanguage): class OpenCLRenderer(CStyleLanguage): device = "GPU" + code_for_op = {**CStyleLanguage().code_for_op, + #UnaryOps.SQRT: lambda x,dtype: f"native_sqrt({x})", UnaryOps.RECIP: lambda x,dtype: f"native_recip({x})", + #UnaryOps.EXP2: lambda x,dtype: f"native_exp2({x})", UnaryOps.LOG2: lambda x,dtype: f"native_log2({x})", + UnaryOps.SIN: lambda x,dtype: f"native_sin({x})"} if getenv("NATIVE_MATH") else CStyleLanguage().code_for_op + # language options kernel_prefix = "__kernel " buffer_prefix = "__global "