diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a7bfb78a2f..f37a652f4d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -428,7 +428,7 @@ jobs: cache-name: cache-gpuocelot-build with: path: ${{ github.workspace }}/gpuocelot/ocelot - key: ubuntu22.04-gpuocelot-18401f4245b27ca4b3af433196583cc81ef84480-rebuild-2 + key: ubuntu22.04-gpuocelot-18401f4245b27ca4b3af433196583cc81ef84480-rebuild-3 - name: Clone/compile gpuocelot if: (matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton') && steps.cache-build.outputs.cache-hit != 'true' run: | diff --git a/autogen_stubs.sh b/autogen_stubs.sh index 922ac28e7e..82b71f8b23 100755 --- a/autogen_stubs.sh +++ b/autogen_stubs.sh @@ -15,10 +15,15 @@ fi BASE=tinygrad/runtime/autogen/ +fixup() { + sed -i '1s/^/# mypy: ignore-errors\n/' $1 + sed -i 's/ *$//' $1 + grep FIXME_STUB $1 || true +} + generate_opencl() { clang2py /usr/include/CL/cl.h -o $BASE/opencl.py -l /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 -k cdefstum - sed -i '1s/^/# mypy: ignore-errors\n/' $BASE/opencl.py - grep FIXME_STUB $BASE/opencl.py || true + fixup $BASE/opencl.py # hot patches sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/opencl.py sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libOpenCL.so.1')\ctypes.CDLL(ctypes.util.find_library('OpenCL'))\g" $BASE/opencl.py @@ -31,8 +36,7 @@ generate_hip() { --clang-args="-D__HIP_PLATFORM_AMD__ -I/opt/rocm/include -x c++" -o $BASE/hip.py -l /opt/rocm/lib/libamdhip64.so echo "hipDeviceProp_t = hipDeviceProp_tR0600" >> $BASE/hip.py echo "hipGetDeviceProperties = hipGetDevicePropertiesR0600" >> $BASE/hip.py - sed -i '1s/^/# mypy: ignore-errors\n/' $BASE/hip.py - grep FIXME_STUB $BASE/hip.py || true + fixup $BASE/hip.py # we can trust HIP is always at /opt/rocm/lib #sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/hip.py #sed -i "s\ctypes.CDLL('/opt/rocm/lib/libhiprtc.so')\ctypes.CDLL(ctypes.util.find_library('hiprtc'))\g" $BASE/hip.py @@ -41,8 +45,7 @@ generate_hip() { clang2py /opt/rocm/include/amd_comgr/amd_comgr.h \ --clang-args="-D__HIP_PLATFORM_AMD__ -I/opt/rocm/include -x c++" -o $BASE/comgr.py -l /opt/rocm/lib/libamd_comgr.so - sed -i '1s/^/# mypy: ignore-errors\n/' $BASE/comgr.py - grep FIXME_STUB $BASE/comgr.py || true + fixup $BASE/comgr.py python3 -c "import tinygrad.runtime.autogen.comgr" } @@ -51,8 +54,7 @@ generate_cuda() { sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" $BASE/cuda.py sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libnvrtc.so')\ctypes.CDLL(ctypes.util.find_library('nvrtc'))\g" $BASE/cuda.py - sed -i '1s/^/# mypy: ignore-errors\n/' $BASE/cuda.py - grep FIXME_STUB $BASE/cuda.py || true + fixup $BASE/cuda.py python3 -c "import tinygrad.runtime.autogen.cuda" } diff --git a/tinygrad/runtime/autogen/hip.py b/tinygrad/runtime/autogen/hip.py index a50237dad2..b2e1a7de8d 100644 --- a/tinygrad/runtime/autogen/hip.py +++ b/tinygrad/runtime/autogen/hip.py @@ -131,7 +131,7 @@ class FunctionFactoryStub: # libraries['FIXME_STUB'] explanation # As you did not list (-l libraryname.so) a library that exports this function -# This is a non-working stub instead. +# This is a non-working stub instead. # You can either re-run clan2py with -l /path/to/library.so # Or manually fix this by comment the ctypes.CDLL loading _libraries = {} diff --git a/tinygrad/runtime/autogen/opencl.py b/tinygrad/runtime/autogen/opencl.py index cd74a0869e..e2b73a8660 100644 --- a/tinygrad/runtime/autogen/opencl.py +++ b/tinygrad/runtime/autogen/opencl.py @@ -614,13 +614,13 @@ CL_VERSION_PATCH_BITS = (12) # macro # CL_VERSION_MINOR_MASK = ((1<<(10)) # macro # CL_VERSION_PATCH_MASK = ((1<<(12)) # macro # def CL_VERSION_MAJOR(version): # macro -# return ((version)>>((10)+(12))) +# return ((version)>>((10)+(12))) # def CL_VERSION_MINOR(version): # macro -# return (((version)>>(12))&((1<<(10))) +# return (((version)>>(12))&((1<<(10))) # def CL_VERSION_PATCH(version): # macro -# return ((version)&((1<<(12))) +# return ((version)&((1<<(12))) # def CL_MAKE_VERSION(major, minor, patch): # macro -# return ((((major)&((1<<(10)))<<((10)+(12)))|(((minor)&((1<<(10)))<<(12))|((patch)&((1<<(12)))) +# return ((((major)&((1<<(10)))<<((10)+(12)))|(((minor)&((1<<(10)))<<(12))|((patch)&((1<<(12)))) class struct__cl_platform_id(Structure): pass