mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
Cleanup llvm cleanup (and some clang things too) (#8871)
* Cleanup llvm cleanup (and some clang things too) * Tests * Tests 2 * forgot mockgpu * more print some sources
This commit is contained in:
54
.github/workflows/test.yml
vendored
54
.github/workflows/test.yml
vendored
@@ -611,10 +611,8 @@ jobs:
|
||||
osxtests:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
backend: [amd]
|
||||
|
||||
name: Tests on MacOS (${{ matrix.backend }})
|
||||
name: Tests on MacOS
|
||||
runs-on: macos-15
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
@@ -631,30 +629,62 @@ jobs:
|
||||
with:
|
||||
path: /Users/runner/Library/Python/3.12/lib/python/site-packages
|
||||
key: mockgpu-osx-${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }}
|
||||
- name: Set env
|
||||
run: printf "${{ matrix.backend == 'amd' && 'MOCKGPU=1\nAMD=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
|
||||
- name: Install llvm
|
||||
run: |
|
||||
brew install llvm
|
||||
- name: Install comgr
|
||||
if: matrix.backend == 'amd'
|
||||
run: |
|
||||
sudo mkdir -p /usr/local/lib
|
||||
curl -s -H "Authorization: token $GH_TOKEN" curl -s https://api.github.com/repos/nimlgen/amdcomgr_dylib/releases/latest | \
|
||||
jq -r '.assets[] | select(.name == "libamd_comgr.dylib").browser_download_url' | \
|
||||
sudo xargs curl -L -o /usr/local/lib/libamd_comgr.dylib
|
||||
- name: Install remu
|
||||
if: matrix.backend == 'amd'
|
||||
run: |
|
||||
curl -s -H "Authorization: token $GH_TOKEN" curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \
|
||||
jq -r '.assets[] | select(.name == "libremu.dylib").browser_download_url' | \
|
||||
sudo xargs curl -L -o /usr/local/lib/libremu.dylib
|
||||
- name: Install dependencies
|
||||
run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
- name: Check Device.DEFAULT and print some source
|
||||
- name: Check Device.DEFAULT and print some source (AMD)
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
MOCKGPU: 1
|
||||
AMD: 1
|
||||
FORWARD_ONLY: 1
|
||||
run: |
|
||||
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
|
||||
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
|
||||
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
|
||||
DEBUG=5 python3 test/test_ops.py TestOps.test_add
|
||||
- name: Check Device.DEFAULT and print some source (LLVM)
|
||||
env:
|
||||
LLVM: 1
|
||||
run: |
|
||||
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'LLVM', Device.DEFAULT"
|
||||
DEBUG=5 python3 test/test_ops.py TestOps.test_add
|
||||
- name: Check Device.DEFAULT and print some source (CLANG)
|
||||
env:
|
||||
CLANG: 1
|
||||
run: |
|
||||
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'CLANG', Device.DEFAULT"
|
||||
DEBUG=5 python3 test/test_ops.py TestOps.test_add
|
||||
- name: Run pytest (amd)
|
||||
if: matrix.backend=='amd'
|
||||
run: python -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
|
||||
env:
|
||||
MOCKGPU: 1
|
||||
AMD: 1
|
||||
FORWARD_ONLY: 1
|
||||
run: |
|
||||
python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
|
||||
- name: Run pytest (llvm)
|
||||
env:
|
||||
LLVM: 1
|
||||
run: |
|
||||
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
|
||||
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
|
||||
- name: Run pytest (clang)
|
||||
env:
|
||||
CLANG: 1
|
||||
run: |
|
||||
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
|
||||
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
|
||||
|
||||
wintests:
|
||||
strategy:
|
||||
|
||||
@@ -278,6 +278,7 @@ def capstone_flatdump(lib: bytes):
|
||||
case machine: raise NotImplementedError(f"Capstone disassembly isn't supported for {machine}")
|
||||
for instr in cs.disasm(lib, 0):
|
||||
print(f"{instr.address:#08x}: {instr.mnemonic}\t{instr.op_str}")
|
||||
sys.stdout.flush()
|
||||
|
||||
# *** ctypes helpers
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ class ClangJITCompiler(Compiler):
|
||||
# x18 is a reserved platform register. It is clobbered on context switch in macos and is used to store TEB pointer in windows on arm, don't use it
|
||||
target = 'x86_64' if sys.platform == 'win32' else platform.machine()
|
||||
args = ['-march=native', f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib']
|
||||
arch_args = ['-ffixed-x18'] if platform.machine() == 'arm64' else []
|
||||
arch_args = ['-ffixed-x18'] if target == 'arm64' else []
|
||||
obj = subprocess.check_output(['clang', '-c', '-x', 'c', *args, *arch_args, '-', '-o', '-'], input=src.encode('utf-8'))
|
||||
return jit_loader(obj)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import ctypes, platform, sys
|
||||
from tinygrad.device import Compiled, Compiler, MallocAllocator, CPUProgram
|
||||
from tinygrad.helpers import getenv, capstone_flatdump
|
||||
from tinygrad.helpers import OSX, getenv, capstone_flatdump
|
||||
from tinygrad.renderer.llvmir import LLVMRenderer
|
||||
import tinygrad.runtime.autogen.llvm as llvm
|
||||
from tinygrad.runtime.support.elf import jit_loader
|
||||
@@ -14,11 +14,12 @@ def expect(x, err, ret=None):
|
||||
class LLVMCompiler(Compiler):
|
||||
def __init__(self, host_arch:str, opt:bool):
|
||||
for component in ['Target', 'TargetInfo', 'TargetMC', 'AsmPrinter']: getattr(llvm, f'LLVMInitialize{host_arch}{component}')()
|
||||
triple = ({'AArch64': 'aarch64', 'X86': 'x86_64'}[host_arch]+'-none-unknown-elf').encode()
|
||||
|
||||
triple = {'AArch64': b'aarch64', 'X86': b'x86_64'}[host_arch] + b'-none-unknown-elf'
|
||||
target = expect(llvm.LLVMGetTargetFromTriple(triple, ctypes.pointer(tgt:=llvm.LLVMTargetRef()), err:=cerr()), err, tgt)
|
||||
target_machine = llvm.LLVMCreateTargetMachine(target, triple, b'', b'+reserve-x18' if platform.machine() == 'arm64' else b'',
|
||||
llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)
|
||||
# +reserve-x18 here does the same thing as -ffixed-x18 in ops_clang.py, see comments there for why it's needed on arm osx
|
||||
self.target_machine = llvm.LLVMCreateTargetMachine(target, triple, b'', b'+reserve-x18' if OSX and host_arch == 'AArch64' else b'',
|
||||
llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)
|
||||
|
||||
self.pbo = llvm.LLVMCreatePassBuilderOptions()
|
||||
if opt:
|
||||
@@ -29,7 +30,7 @@ class LLVMCompiler(Compiler):
|
||||
llvm.LLVMPassBuilderOptionsSetVerifyEach(self.pbo, True)
|
||||
else:
|
||||
self.passes = b'default<O0>'
|
||||
self.target_machine, self.opt = target_machine, opt
|
||||
|
||||
super().__init__(f"compile_llvm_jit{'_opt' if opt else ''}")
|
||||
|
||||
def __del__(self):
|
||||
|
||||
Reference in New Issue
Block a user