From 78a6af3da7041a618c34e1e5832bbe488904ec1f Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev Date: Sun, 13 Apr 2025 12:20:19 -0300 Subject: [PATCH] Use $CUDA_PATH/include for CUDA headers (#9858) --- docs/env_vars.md | 1 + tinygrad/runtime/support/compiler_cuda.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/env_vars.md b/docs/env_vars.md index b23b435675..bcd3a4fa0f 100644 --- a/docs/env_vars.md +++ b/docs/env_vars.md @@ -50,6 +50,7 @@ JIT | [0-2] | 0=disabled, 1=[jit enabled](quickstart.md#jit VIZ | [1] | 0=disabled, 1=[viz enabled](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/viz) ALLOW_TF32 | [1] | enable TensorFloat-32 tensor cores on Ampere or newer GPUs. WEBGPU_BACKEND | [WGPUBackendType_Metal, ...] | Force select a backend for WebGPU (Metal, DirectX, OpenGL, Vulkan...) +CUDA_PATH | str | Use `CUDA_PATH/include` for CUDA headers for CUDA and NV backends. If not set, TinyGrad will use `/usr/local/cuda/include`, `/usr/include` and `/opt/cuda/include`. ## Debug breakdown diff --git a/tinygrad/runtime/support/compiler_cuda.py b/tinygrad/runtime/support/compiler_cuda.py index e0bc06726c..14cd1a0823 100644 --- a/tinygrad/runtime/support/compiler_cuda.py +++ b/tinygrad/runtime/support/compiler_cuda.py @@ -4,7 +4,7 @@ from tinygrad.helpers import to_char_p_p, colored, init_c_var, getenv import tinygrad.runtime.autogen.nvrtc as nvrtc from tinygrad.device import Compiler, CompileError -PTX = getenv("PTX") # this shouldn't be here, in fact, it shouldn't exist +PTX, CUDA_PATH = getenv("PTX"), getenv("CUDA_PATH", "") # PTX shouldn't be here, in fact, it shouldn't exist def _get_bytes(arg, get_str, get_sz, check) -> bytes: sz = init_c_var(ctypes.c_size_t(), lambda x: check(get_sz(arg, ctypes.byref(x)))) @@ -40,7 +40,8 @@ def cuda_disassemble(lib, arch): class CUDACompiler(Compiler): def __init__(self, arch:str, cache_key:str="cuda"): - self.arch, self.compile_options = arch, [f'--gpu-architecture={arch}', "-I/usr/local/cuda/include", "-I/usr/include", "-I/opt/cuda/include/"] + self.arch, self.compile_options = arch, [f'--gpu-architecture={arch}'] + self.compile_options += [f"-I{CUDA_PATH}/include"] if CUDA_PATH else ["-I/usr/local/cuda/include", "-I/usr/include", "-I/opt/cuda/include"] nvrtc_check(nvrtc.nvrtcVersion((nvrtcMajor := ctypes.c_int()), (nvrtcMinor := ctypes.c_int()))) if (nvrtcMajor.value, nvrtcMinor.value) >= (12, 4): self.compile_options.append("--minimal") super().__init__(f"compile_{cache_key}_{self.arch}")