Cleanup llvm cleanup (and some clang things too) (#8871)

* Cleanup llvm cleanup (and some clang things too)

* Tests

* Tests 2

* forgot mockgpu

* more print some sources
This commit is contained in:
uuuvn
2025-02-05 01:49:05 +02:00
committed by GitHub
parent bb5ded85cc
commit a51c688f39
4 changed files with 50 additions and 18 deletions

View File

@@ -611,10 +611,8 @@ jobs:
osxtests:
strategy:
fail-fast: false
matrix:
backend: [amd]
name: Tests on MacOS (${{ matrix.backend }})
name: Tests on MacOS
runs-on: macos-15
timeout-minutes: 45
steps:
@@ -631,30 +629,62 @@ jobs:
with:
path: /Users/runner/Library/Python/3.12/lib/python/site-packages
key: mockgpu-osx-${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }}
- name: Set env
run: printf "${{ matrix.backend == 'amd' && 'MOCKGPU=1\nAMD=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
- name: Install llvm
run: |
brew install llvm
- name: Install comgr
if: matrix.backend == 'amd'
run: |
sudo mkdir -p /usr/local/lib
curl -s -H "Authorization: token $GH_TOKEN" curl -s https://api.github.com/repos/nimlgen/amdcomgr_dylib/releases/latest | \
jq -r '.assets[] | select(.name == "libamd_comgr.dylib").browser_download_url' | \
sudo xargs curl -L -o /usr/local/lib/libamd_comgr.dylib
- name: Install remu
if: matrix.backend == 'amd'
run: |
curl -s -H "Authorization: token $GH_TOKEN" curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \
jq -r '.assets[] | select(.name == "libremu.dylib").browser_download_url' | \
sudo xargs curl -L -o /usr/local/lib/libremu.dylib
- name: Install dependencies
run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check Device.DEFAULT and print some source
- name: Check Device.DEFAULT and print some source (AMD)
env:
PYTHONPATH: ${{ github.workspace }}
MOCKGPU: 1
AMD: 1
FORWARD_ONLY: 1
run: |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
DEBUG=5 python3 test/test_ops.py TestOps.test_add
- name: Check Device.DEFAULT and print some source (LLVM)
env:
LLVM: 1
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'LLVM', Device.DEFAULT"
DEBUG=5 python3 test/test_ops.py TestOps.test_add
- name: Check Device.DEFAULT and print some source (CLANG)
env:
CLANG: 1
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'CLANG', Device.DEFAULT"
DEBUG=5 python3 test/test_ops.py TestOps.test_add
- name: Run pytest (amd)
if: matrix.backend=='amd'
run: python -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
env:
MOCKGPU: 1
AMD: 1
FORWARD_ONLY: 1
run: |
python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
- name: Run pytest (llvm)
env:
LLVM: 1
run: |
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
- name: Run pytest (clang)
env:
CLANG: 1
run: |
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
wintests:
strategy:

View File

@@ -278,6 +278,7 @@ def capstone_flatdump(lib: bytes):
case machine: raise NotImplementedError(f"Capstone disassembly isn't supported for {machine}")
for instr in cs.disasm(lib, 0):
print(f"{instr.address:#08x}: {instr.mnemonic}\t{instr.op_str}")
sys.stdout.flush()
# *** ctypes helpers

View File

@@ -12,7 +12,7 @@ class ClangJITCompiler(Compiler):
# x18 is a reserved platform register. It is clobbered on context switch in macos and is used to store TEB pointer in windows on arm, don't use it
target = 'x86_64' if sys.platform == 'win32' else platform.machine()
args = ['-march=native', f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib']
arch_args = ['-ffixed-x18'] if platform.machine() == 'arm64' else []
arch_args = ['-ffixed-x18'] if target == 'arm64' else []
obj = subprocess.check_output(['clang', '-c', '-x', 'c', *args, *arch_args, '-', '-o', '-'], input=src.encode('utf-8'))
return jit_loader(obj)

View File

@@ -1,6 +1,6 @@
import ctypes, platform, sys
from tinygrad.device import Compiled, Compiler, MallocAllocator, CPUProgram
from tinygrad.helpers import getenv, capstone_flatdump
from tinygrad.helpers import OSX, getenv, capstone_flatdump
from tinygrad.renderer.llvmir import LLVMRenderer
import tinygrad.runtime.autogen.llvm as llvm
from tinygrad.runtime.support.elf import jit_loader
@@ -14,11 +14,12 @@ def expect(x, err, ret=None):
class LLVMCompiler(Compiler):
def __init__(self, host_arch:str, opt:bool):
for component in ['Target', 'TargetInfo', 'TargetMC', 'AsmPrinter']: getattr(llvm, f'LLVMInitialize{host_arch}{component}')()
triple = ({'AArch64': 'aarch64', 'X86': 'x86_64'}[host_arch]+'-none-unknown-elf').encode()
triple = {'AArch64': b'aarch64', 'X86': b'x86_64'}[host_arch] + b'-none-unknown-elf'
target = expect(llvm.LLVMGetTargetFromTriple(triple, ctypes.pointer(tgt:=llvm.LLVMTargetRef()), err:=cerr()), err, tgt)
target_machine = llvm.LLVMCreateTargetMachine(target, triple, b'', b'+reserve-x18' if platform.machine() == 'arm64' else b'',
llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)
# +reserve-x18 here does the same thing as -ffixed-x18 in ops_clang.py, see comments there for why it's needed on arm osx
self.target_machine = llvm.LLVMCreateTargetMachine(target, triple, b'', b'+reserve-x18' if OSX and host_arch == 'AArch64' else b'',
llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)
self.pbo = llvm.LLVMCreatePassBuilderOptions()
if opt:
@@ -29,7 +30,7 @@ class LLVMCompiler(Compiler):
llvm.LLVMPassBuilderOptionsSetVerifyEach(self.pbo, True)
else:
self.passes = b'default<O0>'
self.target_machine, self.opt = target_machine, opt
super().__init__(f"compile_llvm_jit{'_opt' if opt else ''}")
def __del__(self):