From 58206fa8a971891adbcf4b13b730770a0e628d75 Mon Sep 17 00:00:00 2001 From: b1tg <33436708+b1tg@users.noreply.github.com> Date: Fri, 21 Mar 2025 23:13:27 +0800 Subject: [PATCH] add amd llvm compiler (#9519) Co-authored-by: b1tg Co-authored-by: chenyu --- .github/workflows/test.yml | 11 ++++++++ test/test_amd_llvm.py | 32 ++++++++++++++++++++++++ tinygrad/runtime/support/compiler_amd.py | 15 +++++++++++ 3 files changed, 58 insertions(+) create mode 100644 test/test_amd_llvm.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index db2c0f58bc..f008092bcf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -549,6 +549,7 @@ jobs: deps: testing_minimal${{matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}} opencl: ${{ matrix.backend == 'gpu' && 'true' }} amd: ${{ matrix.backend == 'amd' && 'true' }} + llvm: ${{ (matrix.backend == 'amd' || matrix.backend == 'llvm') && 'true' }} cuda: ${{ (matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv') && 'true' }} - name: Set env run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nPTX=1\nMOCKGPU=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nMOCKGPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV @@ -565,6 +566,9 @@ jobs: - name: Run pytest (amd) if: matrix.backend=='amd' run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/test_hcq.py test/external/external_test_am.py --durations=20 + - name: Run pytest (amd with llvm backend) + if: matrix.backend=='amd' + run: python -m pytest -n=auto test/test_amd_llvm.py --durations=20 - name: Run TRANSCENDENTAL math run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 - name: Run process replay tests @@ -588,6 +592,7 @@ jobs: python-version: '3.11' amd: 'true' cuda: 'true' + llvm: 'true' - name: Run real world test run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 - name: Test models (Metal) @@ -615,6 +620,12 @@ jobs: FORWARD_ONLY: 1 run: | python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20 + - name: Run pytest (amd with llvm backend) + env: + MOCKGPU: 1 + AMD: 1 + run: | + python -m pytest -n=auto test/test_amd_llvm.py --durations=20 - name: Run pytest (ptx) env: MOCKGPU: 1 diff --git a/test/test_amd_llvm.py b/test/test_amd_llvm.py new file mode 100644 index 0000000000..08b70cc5f1 --- /dev/null +++ b/test/test_amd_llvm.py @@ -0,0 +1,32 @@ +import unittest +import numpy as np +from tinygrad import Device +from tinygrad.helpers import flat_mv +if Device.DEFAULT=="AMD": + from tinygrad.runtime.ops_amd import AMDAllocator, AMDDevice, AMDProgram + from tinygrad.runtime.support.compiler_amd import AMDLLVMCompiler + +@unittest.skipUnless(Device.DEFAULT == "AMD", "Runs only on AMD") +class TestAMDLLVM(unittest.TestCase): + def test_compiler(self): + src = ''' +; https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AMDGPU/imm.ll +define amdgpu_kernel void @i64_imm_inline_lo(ptr addrspace(1) %out) { +entry: + store i64 1311768464867721221, ptr addrspace(1) %out ; 0x1234567800000005 + ret void +} + ''' + device = AMDDevice() + compiler = AMDLLVMCompiler("gfx1100") + obj = compiler.compile(src) + allocator = AMDAllocator(device) + a = allocator.alloc(1*8) + prog = AMDProgram(device, "test", obj) + prog(a, wait=True) + na = np.empty(1, np.uint64) + allocator._copyout(flat_mv(na.data), a) + assert na == [0x1234567800000005] + +if __name__ == '__main__': + unittest.main() diff --git a/tinygrad/runtime/support/compiler_amd.py b/tinygrad/runtime/support/compiler_amd.py index 2f527ce775..c5da1f8405 100644 --- a/tinygrad/runtime/support/compiler_amd.py +++ b/tinygrad/runtime/support/compiler_amd.py @@ -1,6 +1,7 @@ import ctypes, subprocess import tinygrad.runtime.autogen.comgr as comgr from tinygrad.device import Compiler, CompileError +from tinygrad.runtime.ops_llvm import LLVMCompiler def amdgpu_disassemble(lib:bytes): asm = subprocess.check_output(["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib) @@ -68,3 +69,17 @@ class HIPCompiler(Compiler): try: return compile_hip(src, self.arch, src.split('\n', 1)[0].strip() == '.text') except RuntimeError as e: raise CompileError(e) from e def disassemble(self, lib:bytes): amdgpu_disassemble(lib) + +class AMDLLVMCompiler(LLVMCompiler): + jit = False + target_arch = "AMDGPU" + def __init__(self, arch: str): + self.arch = arch + super().__init__(self.arch, "+cumode") + def __reduce__(self): return (AMDLLVMCompiler, (self.arch,)) + def compile(self, src:str) -> bytes: + try: return super().compile(src) + except RuntimeError as e: + if "undefined value '@llvm.amdgcn." in str(e): raise CompileError(str(e) + "AMD with LLVM backend requires LLVM >= 18") from e + raise CompileError(e) from e + def disassemble(self, lib:bytes): amdgpu_disassemble(lib)