Cleanup llvm cleanup (and some clang things too) (#8871)

* Cleanup llvm cleanup (and some clang things too) * Tests * Tests 2 * forgot mockgpu * more print some sources
2026-01-09 15:08:02 -05:00 · 2025-02-05 01:49:05 +02:00
parent bb5ded85cc
commit a51c688f39
4 changed files with 50 additions and 18 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -611,10 +611,8 @@ jobs:
  osxtests:
    strategy:
      fail-fast: false
-      matrix:
-        backend: [amd]

-    name: Tests on MacOS (${{ matrix.backend }})
+    name: Tests on MacOS
    runs-on: macos-15
    timeout-minutes: 45
    steps:
@@ -631,30 +629,62 @@ jobs:
        with:
          path: /Users/runner/Library/Python/3.12/lib/python/site-packages
          key: mockgpu-osx-${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }}
-      - name: Set env
-        run: printf "${{ matrix.backend == 'amd' && 'MOCKGPU=1\nAMD=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
+      - name: Install llvm
+        run: |
+          brew install llvm
      - name: Install comgr
-        if: matrix.backend == 'amd'
        run: |
          sudo mkdir -p /usr/local/lib
          curl -s -H "Authorization: token $GH_TOKEN" curl -s https://api.github.com/repos/nimlgen/amdcomgr_dylib/releases/latest | \
            jq -r '.assets[] | select(.name == "libamd_comgr.dylib").browser_download_url' | \
            sudo xargs curl -L -o /usr/local/lib/libamd_comgr.dylib
      - name: Install remu
-        if: matrix.backend == 'amd'
        run: |
          curl -s -H "Authorization: token $GH_TOKEN" curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \
            jq -r '.assets[] | select(.name == "libremu.dylib").browser_download_url' | \
            sudo xargs curl -L -o /usr/local/lib/libremu.dylib
      - name: Install dependencies
        run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
-      - name: Check Device.DEFAULT and print some source
+      - name: Check Device.DEFAULT and print some source (AMD)
+        env:
+          PYTHONPATH: ${{ github.workspace }}
+          MOCKGPU: 1
+          AMD: 1
+          FORWARD_ONLY: 1
        run: |
-          PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
-          DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
+          python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
+          DEBUG=5 python3 test/test_ops.py TestOps.test_add
+      - name: Check Device.DEFAULT and print some source (LLVM)
+        env:
+          LLVM: 1
+        run: |
+          python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'LLVM', Device.DEFAULT"
+          DEBUG=5 python3 test/test_ops.py TestOps.test_add
+      - name: Check Device.DEFAULT and print some source (CLANG)
+        env:
+          CLANG: 1
+        run: |
+          python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'CLANG', Device.DEFAULT"
+          DEBUG=5 python3 test/test_ops.py TestOps.test_add
      - name: Run pytest (amd)
-        if: matrix.backend=='amd'
-        run: python -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
+        env:
+          MOCKGPU: 1
+          AMD: 1
+          FORWARD_ONLY: 1
+        run: |
+          python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
+      - name: Run pytest (llvm)
+        env:
+          LLVM: 1
+        run: |
+          python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
+          ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
+      - name: Run pytest (clang)
+        env:
+          CLANG: 1
+        run: |
+          python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
+          ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'

  wintests:
    strategy:
--- a/tinygrad/helpers.py
+++ b/tinygrad/helpers.py
@@ -278,6 +278,7 @@ def capstone_flatdump(lib: bytes):
    case machine: raise NotImplementedError(f"Capstone disassembly isn't supported for {machine}")
  for instr in cs.disasm(lib, 0):
    print(f"{instr.address:#08x}: {instr.mnemonic}\t{instr.op_str}")
+  sys.stdout.flush()

 # *** ctypes helpers

--- a/tinygrad/runtime/ops_clang.py
+++ b/tinygrad/runtime/ops_clang.py
@@ -12,7 +12,7 @@ class ClangJITCompiler(Compiler):
    # x18 is a reserved platform register. It is clobbered on context switch in macos and is used to store TEB pointer in windows on arm, don't use it
    target = 'x86_64' if sys.platform == 'win32' else platform.machine()
    args = ['-march=native', f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib']
-    arch_args = ['-ffixed-x18'] if platform.machine() == 'arm64' else []
+    arch_args = ['-ffixed-x18'] if target == 'arm64' else []
    obj = subprocess.check_output(['clang', '-c', '-x', 'c', *args, *arch_args, '-', '-o', '-'], input=src.encode('utf-8'))
    return jit_loader(obj)

--- a/tinygrad/runtime/ops_llvm.py
+++ b/tinygrad/runtime/ops_llvm.py
@@ -1,6 +1,6 @@
 import ctypes, platform, sys
 from tinygrad.device import Compiled, Compiler, MallocAllocator, CPUProgram
-from tinygrad.helpers import getenv, capstone_flatdump
+from tinygrad.helpers import OSX, getenv, capstone_flatdump
 from tinygrad.renderer.llvmir import LLVMRenderer
 import tinygrad.runtime.autogen.llvm as llvm
 from tinygrad.runtime.support.elf import jit_loader
@@ -14,11 +14,12 @@ def expect(x, err, ret=None):
 class LLVMCompiler(Compiler):
  def __init__(self, host_arch:str, opt:bool):
    for component in ['Target', 'TargetInfo', 'TargetMC', 'AsmPrinter']: getattr(llvm, f'LLVMInitialize{host_arch}{component}')()
-    triple = ({'AArch64': 'aarch64', 'X86': 'x86_64'}[host_arch]+'-none-unknown-elf').encode()

+    triple = {'AArch64': b'aarch64', 'X86': b'x86_64'}[host_arch] + b'-none-unknown-elf'
    target = expect(llvm.LLVMGetTargetFromTriple(triple, ctypes.pointer(tgt:=llvm.LLVMTargetRef()), err:=cerr()), err, tgt)
-    target_machine = llvm.LLVMCreateTargetMachine(target, triple, b'', b'+reserve-x18' if platform.machine() == 'arm64' else b'',
-                                                  llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)
+    # +reserve-x18 here does the same thing as -ffixed-x18 in ops_clang.py, see comments there for why it's needed on arm osx
+    self.target_machine = llvm.LLVMCreateTargetMachine(target, triple, b'', b'+reserve-x18' if OSX and host_arch == 'AArch64' else b'',
+                                                       llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)

    self.pbo = llvm.LLVMCreatePassBuilderOptions()
    if opt:
@@ -29,7 +30,7 @@ class LLVMCompiler(Compiler):
      llvm.LLVMPassBuilderOptionsSetVerifyEach(self.pbo, True)
    else:
      self.passes = b'default<O0>'
-    self.target_machine, self.opt = target_machine, opt
+
    super().__init__(f"compile_llvm_jit{'_opt' if opt else ''}")

  def __del__(self):