Less messy broken graph on paravirtualized metal workaround (#10182)

* Less messy broken graph on paravirtualized metal workaround GitHub CI macOS runners use paravirtualized metal which is broken with graph (some comments say that ICB in particular is broken but in my testing it was fine sometimes, but other times hitting an assert inside metal's code related to resouces, so not sure). > Assertion failed: (resource != nil), function -[IOGPUMetalResource initWithResource:], file IOGPUMetalResource.m, line 458. This can be reproduced locally with any virtualization software (like utm) that can create macOS VMs with apple's own virtualization framework. * unused import
2026-01-09 15:08:02 -05:00 · 2025-05-06 22:41:02 +05:00
parent 59c03e8904
commit dba073e5c0
5 changed files with 9 additions and 10 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -706,11 +706,11 @@ jobs:
        cuda: 'true'
        llvm: 'true'
    - name: Run real world test
-      run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
+      run: METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
    - name: Test models (Metal)
-      run: JIT=2 METAL=1 python -m pytest test/models -v --durations=20
+      run: METAL=1 python -m pytest test/models -v --durations=20
    - name: Run ONNX
-      run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
+      run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
    - name: Test tensor core ops (fake)
      run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
    - name: Test tensor core ops (real)
@@ -814,7 +814,7 @@ jobs:
          deps: testing_minimal
          llvm: ${{ matrix.backend == 'llvm' && 'true' }}
      - name: Set env
-        run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1\nJIT=2'}}" >> $GITHUB_ENV
+        run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1'}}" >> $GITHUB_ENV
      - name: Check Device.DEFAULT and print some source
        run: |
          python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT"
--- a/test/test_graph.py
+++ b/test/test_graph.py
@@ -75,7 +75,6 @@ def helper_test_graphs(graph_impl, graphs, runs=RUN_CNT):
    for i in range(len(ground_thruth_bufs)): np.testing.assert_equal(ground_truth_np[i], test_bufs_np[i])

@unittest.skipUnless(Device[Device.DEFAULT].graph is not None, "graph support required")
-@unittest.skipIf(CI and Device.DEFAULT=="METAL", "no ICB in CI, creation of graph fails")
 class TestGraph(unittest.TestCase):
  def test_order_2_writes_to_same_buf(self):
    d0 = Device.DEFAULT
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -7,7 +7,7 @@ from test.helpers import assert_jit_cache_len, not_support_multi_device
 from tinygrad.tensor import Tensor
 from tinygrad.engine.jit import TinyJit
 from tinygrad.device import Device
-from tinygrad.helpers import CI, Context, JIT, GlobalCounters
+from tinygrad.helpers import Context, JIT, GlobalCounters
 from tinygrad.dtype import dtypes
 from extra.models.unet import ResBlock

@@ -394,7 +394,6 @@ class TestJit(unittest.TestCase):
    np.testing.assert_allclose(result_2.numpy(), [6], atol=1e-4, rtol=1e-5)
    np.testing.assert_allclose(result_3.numpy(), [6], atol=1e-4, rtol=1e-5)

-  @unittest.skipIf(CI and Device.DEFAULT=="METAL", "no ICB in CI, creation of graph fails")
  def test_jit_batch_split(self):
    if Device[Device.DEFAULT].graph is None or JIT >= 2: raise unittest.SkipTest("only test graphs")

--- a/test/test_multitensor.py
+++ b/test/test_multitensor.py
@@ -444,7 +444,6 @@ class TestMultiTensor(unittest.TestCase):
      np.testing.assert_allclose(r.numpy(), np.ones(256)+np.ones(256), atol=1e-4, rtol=1e-5)
    assert len(jf.jit_cache) > 0

-  #@unittest.skipIf(CI and Device.DEFAULT=="METAL", "no ICB in CI, creation of graph fails")
  @unittest.skip("test broken")
  def test_multi_device_jit_graph(self):
    if Device[d0].graph is None or Device[d1].graph is None: raise unittest.SkipTest("only test graphs")
--- a/tinygrad/runtime/ops_metal.py
+++ b/tinygrad/runtime/ops_metal.py
@@ -1,6 +1,6 @@
 import os, pathlib, struct, ctypes, tempfile, functools, contextlib, decimal, platform
 from typing import Any, Union, cast
-from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE
+from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE, CI
 from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, cpu_profile, ProfileDeviceEvent, ProfileRangeEvent
 from tinygrad.renderer.cstyle import MetalRenderer

@@ -73,8 +73,10 @@ class MetalDevice(Compiled):
    Compiled.profile_events += [ProfileDeviceEvent(device)]

    from tinygrad.runtime.graph.metal import MetalGraph
+    # NOTE: GitHub CI macOS runners use paravirtualized metal which is broken with graph.
+    # This can be reproduced locally with any virtualization software (like utm) that can create macOS VMs with apple's own virtualization framework.
    super().__init__(device, MetalAllocator(self), MetalRenderer(), MetalCompiler() if getenv("METAL_DIRECT", 1) else Compiler(),
-                     functools.partial(MetalProgram, self), MetalGraph)
+                     functools.partial(MetalProgram, self), MetalGraph if not CI else None)

  def synchronize(self):
    for cbuf in self.mtl_buffers_in_flight: