run on update_benchmark too (#1460)

* run on update_benchmark too * amd inference test * name it better * add 10 CIFAR training steps
2026-04-29 03:00:14 -04:00 · 2023-08-06 08:58:37 -07:00
parent 3d41674b42
commit 2ab282bfec
2 changed files with 28 additions and 7 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -4,18 +4,36 @@ on:
  push:
    branches:
      - master
+      - update_benchmark

 jobs:
  testmacbenchmark:
    name: Mac Benchmark
-    runs-on: self-hosted
+    runs-on: [self-hosted, macOS]

    steps:
    - name: Checkout Code
      uses: actions/checkout@v3
-    - name: Run metal ops test
+    - name: Run model inference benchmark
      run: python3 test/external/external_model_benchmark.py
+    - name: Run 10 CIFAR training steps
+      run: STEPS=10 python3 examples/hlb_cifar10.py
    - uses: actions/upload-artifact@v3
      with:
-        name: ONNX Inference Speed
+        name: ONNX Inference Speed (Mac)
+        path: onnx_inference_speed.csv
+  testamdbenchmark:
+    name: AMD Benchmark
+    runs-on: [self-hosted, Linux]
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v3
+    - name: Run model inference benchmark
+      run: python3 test/external/external_model_benchmark.py
+    - name: Run 10 CIFAR training steps
+      run: STEPS=10 python3 examples/hlb_cifar10.py
+    - uses: actions/upload-artifact@v3
+      with:
+        name: ONNX Inference Speed (AMD)
        path: onnx_inference_speed.csv
--- a/test/external/external_model_benchmark.py
+++ b/test/external/external_model_benchmark.py
@@ -3,9 +3,11 @@ import pathlib
 import time
 import onnx
 import torch
+torch.set_num_threads(1)
 from onnx2torch import convert
 from extra.utils import download_file
 from extra.onnx import get_run_onnx
+from tinygrad.helpers import OSX
 from tinygrad.tensor import Tensor
 from tinygrad.lazy import Device

@@ -53,7 +55,7 @@ def benchmark_model(m):
  np_inputs = {k:torch.randn(shp).numpy() for k,shp in input_shapes.items()}
  assert len(input_shapes) < 20

-  for device in ["METAL", "CLANG"]:
+  for device in ["METAL" if OSX else "GPU", "CLANG"]:
    Device.DEFAULT = device
    inputs = {k:Tensor(inp) for k,inp in np_inputs.items()}
    tinygrad_model = get_run_onnx(onnx_model)
@@ -69,9 +71,10 @@ def benchmark_model(m):
  torch_inputs = [torch.tensor(x) for x in np_inputs.values()]
  benchmark(m, "torch_cpu", lambda: torch_model(*torch_inputs))

-  torch_mps_model = torch_model.to('mps')
-  torch_mps_inputs = [x.to('mps') for x in torch_inputs]
-  benchmark(m, "torch_mps", lambda: torch_mps_model(*torch_mps_inputs))
+  torch_device = "mps" if OSX else "cuda"
+  torch_mps_model = torch_model.to(torch_device)
+  torch_mps_inputs = [x.to(torch_device) for x in torch_inputs]
+  benchmark(m, f"torch_{torch_device}", lambda: torch_mps_model(*torch_mps_inputs))

  if open_csv is None:
    open_csv = csv.DictWriter(open('onnx_inference_speed.csv', 'w', newline=''), fieldnames=list(CSV.keys()))