mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
Less messy broken graph on paravirtualized metal workaround (#10182)
* Less messy broken graph on paravirtualized metal workaround GitHub CI macOS runners use paravirtualized metal which is broken with graph (some comments say that ICB in particular is broken but in my testing it was fine sometimes, but other times hitting an assert inside metal's code related to resouces, so not sure). > Assertion failed: (resource != nil), function -[IOGPUMetalResource initWithResource:], file IOGPUMetalResource.m, line 458. This can be reproduced locally with any virtualization software (like utm) that can create macOS VMs with apple's own virtualization framework. * unused import
This commit is contained in:
8
.github/workflows/test.yml
vendored
8
.github/workflows/test.yml
vendored
@@ -706,11 +706,11 @@ jobs:
|
||||
cuda: 'true'
|
||||
llvm: 'true'
|
||||
- name: Run real world test
|
||||
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
|
||||
run: METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
|
||||
- name: Test models (Metal)
|
||||
run: JIT=2 METAL=1 python -m pytest test/models -v --durations=20
|
||||
run: METAL=1 python -m pytest test/models -v --durations=20
|
||||
- name: Run ONNX
|
||||
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
- name: Test tensor core ops (fake)
|
||||
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
|
||||
- name: Test tensor core ops (real)
|
||||
@@ -814,7 +814,7 @@ jobs:
|
||||
deps: testing_minimal
|
||||
llvm: ${{ matrix.backend == 'llvm' && 'true' }}
|
||||
- name: Set env
|
||||
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1\nJIT=2'}}" >> $GITHUB_ENV
|
||||
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1'}}" >> $GITHUB_ENV
|
||||
- name: Check Device.DEFAULT and print some source
|
||||
run: |
|
||||
python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT"
|
||||
|
||||
@@ -75,7 +75,6 @@ def helper_test_graphs(graph_impl, graphs, runs=RUN_CNT):
|
||||
for i in range(len(ground_thruth_bufs)): np.testing.assert_equal(ground_truth_np[i], test_bufs_np[i])
|
||||
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].graph is not None, "graph support required")
|
||||
@unittest.skipIf(CI and Device.DEFAULT=="METAL", "no ICB in CI, creation of graph fails")
|
||||
class TestGraph(unittest.TestCase):
|
||||
def test_order_2_writes_to_same_buf(self):
|
||||
d0 = Device.DEFAULT
|
||||
|
||||
@@ -7,7 +7,7 @@ from test.helpers import assert_jit_cache_len, not_support_multi_device
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.helpers import CI, Context, JIT, GlobalCounters
|
||||
from tinygrad.helpers import Context, JIT, GlobalCounters
|
||||
from tinygrad.dtype import dtypes
|
||||
from extra.models.unet import ResBlock
|
||||
|
||||
@@ -394,7 +394,6 @@ class TestJit(unittest.TestCase):
|
||||
np.testing.assert_allclose(result_2.numpy(), [6], atol=1e-4, rtol=1e-5)
|
||||
np.testing.assert_allclose(result_3.numpy(), [6], atol=1e-4, rtol=1e-5)
|
||||
|
||||
@unittest.skipIf(CI and Device.DEFAULT=="METAL", "no ICB in CI, creation of graph fails")
|
||||
def test_jit_batch_split(self):
|
||||
if Device[Device.DEFAULT].graph is None or JIT >= 2: raise unittest.SkipTest("only test graphs")
|
||||
|
||||
|
||||
@@ -444,7 +444,6 @@ class TestMultiTensor(unittest.TestCase):
|
||||
np.testing.assert_allclose(r.numpy(), np.ones(256)+np.ones(256), atol=1e-4, rtol=1e-5)
|
||||
assert len(jf.jit_cache) > 0
|
||||
|
||||
#@unittest.skipIf(CI and Device.DEFAULT=="METAL", "no ICB in CI, creation of graph fails")
|
||||
@unittest.skip("test broken")
|
||||
def test_multi_device_jit_graph(self):
|
||||
if Device[d0].graph is None or Device[d1].graph is None: raise unittest.SkipTest("only test graphs")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os, pathlib, struct, ctypes, tempfile, functools, contextlib, decimal, platform
|
||||
from typing import Any, Union, cast
|
||||
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE
|
||||
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE, CI
|
||||
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, cpu_profile, ProfileDeviceEvent, ProfileRangeEvent
|
||||
from tinygrad.renderer.cstyle import MetalRenderer
|
||||
|
||||
@@ -73,8 +73,10 @@ class MetalDevice(Compiled):
|
||||
Compiled.profile_events += [ProfileDeviceEvent(device)]
|
||||
|
||||
from tinygrad.runtime.graph.metal import MetalGraph
|
||||
# NOTE: GitHub CI macOS runners use paravirtualized metal which is broken with graph.
|
||||
# This can be reproduced locally with any virtualization software (like utm) that can create macOS VMs with apple's own virtualization framework.
|
||||
super().__init__(device, MetalAllocator(self), MetalRenderer(), MetalCompiler() if getenv("METAL_DIRECT", 1) else Compiler(),
|
||||
functools.partial(MetalProgram, self), MetalGraph)
|
||||
functools.partial(MetalProgram, self), MetalGraph if not CI else None)
|
||||
|
||||
def synchronize(self):
|
||||
for cbuf in self.mtl_buffers_in_flight:
|
||||
|
||||
Reference in New Issue
Block a user