From fea20d71b302caa1807e7474727b2ccea30df60b Mon Sep 17 00:00:00 2001
From: Shawn Hagler <haglershawn@gmail.com>
Date: Sat, 30 Dec 2023 10:16:42 -0600
Subject: [PATCH] add `/opt/cuda/include` directory (#2920)

---
 tinygrad/runtime/ops_cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tinygrad/runtime/ops_cuda.py b/tinygrad/runtime/ops_cuda.py
index 0ea328f9f0..0874661efe 100644
--- a/tinygrad/runtime/ops_cuda.py
+++ b/tinygrad/runtime/ops_cuda.py
@@ -20,7 +20,7 @@ def check(status):
 def cu_time_execution(cb, enable=False) -> Optional[float]: return time_execution_cuda_style(cb, cuda.CUevent, cuda.cuEventCreate, cuda.cuEventRecord, cuda.cuEventSynchronize, cuda.cuEventDestroy_v2, cuda.cuEventElapsedTime, enable=enable) if not CUDACPU else cpu_time_execution(cb, enable=enable)  # noqa: E501
 
 @diskcache
-def compile_cuda(prg) -> bytes: return compile_cuda_style(prg, [f'--gpu-architecture={CUDADevice.default_arch_name}', "-I/usr/local/cuda/include", "-I/usr/include"], cuda.nvrtcProgram, cuda.nvrtcCreateProgram, cuda.nvrtcCompileProgram, cuda.nvrtcGetPTX, cuda.nvrtcGetPTXSize, cuda.nvrtcGetProgramLog, cuda.nvrtcGetProgramLogSize, check)  # noqa: E501
+def compile_cuda(prg) -> bytes: return compile_cuda_style(prg, [f'--gpu-architecture={CUDADevice.default_arch_name}', "-I/usr/local/cuda/include", "-I/usr/include", "-I/opt/cuda/include/"], cuda.nvrtcProgram, cuda.nvrtcCreateProgram, cuda.nvrtcCompileProgram, cuda.nvrtcGetPTX, cuda.nvrtcGetPTXSize, cuda.nvrtcGetProgramLog, cuda.nvrtcGetProgramLogSize, check)  # noqa: E501
 
 class CUDAProgram:
   def __init__(self, device:CUDADevice, name:str, lib:bytes):