diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ae7f5818c..f2de0e838a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -977,6 +977,7 @@ jobs: with: key: macos-${{ matrix.backend }}-minimal deps: testing_minimal + pydeps: "capstone" llvm: ${{ matrix.backend == 'llvm' && 'true' }} - name: Set env run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1'}}" >> $GITHUB_ENV diff --git a/test/test_disassembly.py b/test/test_disassembly.py new file mode 100644 index 0000000000..e908b83710 --- /dev/null +++ b/test/test_disassembly.py @@ -0,0 +1,21 @@ +import unittest, io +from tinygrad import Tensor, dtypes +from contextlib import redirect_stdout +from tinygrad.device import Device +from tinygrad.helpers import OSX +from tinygrad.engine.realize import get_program + +class TestDisassembly(unittest.TestCase): + # TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic" + @unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic") + def test_float16_alu(self): + c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16) + s = c.schedule()[-1] + p = get_program(s.ast, Device[Device.DEFAULT].renderer) + lib = Device[Device.DEFAULT].compiler.compile(p.src) + out = io.StringIO() + with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib) + assert "fcvt" not in out.getvalue() + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tinygrad/runtime/ops_cpu.py b/tinygrad/runtime/ops_cpu.py index 2bcb1e5c64..5ebf1f8c31 100644 --- a/tinygrad/runtime/ops_cpu.py +++ b/tinygrad/runtime/ops_cpu.py @@ -18,7 +18,9 @@ class ClangJITCompiler(Compiler): # -fno-math-errno is required for __builtin_sqrt to become an instruction instead of a function call # x18 is a reserved platform register. It is clobbered on context switch in macos and is used to store TEB pointer in windows on arm, don't use it target = 'x86_64' if sys.platform == 'win32' else platform.machine() - args = ['-march=native', f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib', '-fno-ident'] + # on arm march means "runs on this arch and superset" instead of "optimize for this arch". x86 march == arm mcpu + arch = '-march=native' if platform.machine() in ('x86_64', 'AMD64') else '-mcpu=native' + args = [arch, f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib', '-fno-ident'] arch_args = ['-ffixed-x18'] if target == 'arm64' else [] obj = subprocess.check_output([getenv("CC", 'clang'), '-c', '-x', 'c', *args, *arch_args, '-', '-o', '-'], input=src.encode('utf-8')) return jit_loader(obj)