diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 140b6913d4..05839f173b 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -52,6 +52,8 @@ jobs:
       run: JIT=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt
     - name: Run GPT2 w HALF/BEAM
       run: JIT=1 HALF=1 BEAM=2 CACHELEVEL=0 CAST_BEFORE_VIEW=0 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt
+    - name: Train MNIST
+      run: time PYTHONPATH=. TARGET_EVAL_ACC_PCT=97.5 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt
     - name: Run 10 CIFAR training steps
       run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt
     # TODO: this is flaky too
@@ -63,6 +65,7 @@ jobs:
         path: |
           onnx_inference_speed.csv
           torch_speed.txt
+          beautiful_mnist.txt
           train_cifar.txt
           train_cifar_wino.txt
           llama_unjitted.txt
@@ -114,6 +117,8 @@ jobs:
       run: CUDA=1 JIT=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt
     - name: Run GPT2 w HALF/BEAM
       run: CUDA=1 JIT=1 HALF=1 BEAM=2 CACHELEVEL=0 CAST_BEFORE_VIEW=0 JIT_BATCH_SIZE=4 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt
+    - name: Train MNIST
+      run: time PYTHONPATH=. CUDA=1 TARGET_EVAL_ACC_PCT=97.5 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt
     - name: Run 10 CIFAR training steps
       run: CUDA=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt
     - name: Run 10 CIFAR training steps w HALF
@@ -136,6 +141,7 @@ jobs:
           gpt2_jitted.txt
           gpt2_half.txt
           gpt2_half_beam.txt
+          beautiful_mnist.txt
           train_cifar.txt
           train_cifar_half.txt
           train_cifar_bf16.txt
@@ -233,6 +239,8 @@ jobs:
         ln -s /raid/weights/LLaMA-2 weights/LLaMA-2
         mkdir -p extra/datasets
         ln -s /raid/datasets/imagenet extra/datasets/imagenet
+    - name: Train MNIST
+      run: time PYTHONPATH=. HSA=1 TARGET_EVAL_ACC_PCT=97.5 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt
     - name: Run 10 CIFAR training steps
       run: HSA=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt
     - name: Run 10 CIFAR training steps w HALF
@@ -253,6 +261,7 @@ jobs:
       with:
         name: Speed (AMD Training)
         path: |
+          beautiful_mnist.txt
           train_cifar.txt
           train_cifar_half.txt
           train_cifar_bf16.txt
diff --git a/examples/beautiful_mnist.py b/examples/beautiful_mnist.py
index 4a2cbdb4e6..5a38f1f33a 100644
--- a/examples/beautiful_mnist.py
+++ b/examples/beautiful_mnist.py
@@ -1,6 +1,7 @@
 # model based off https://towardsdatascience.com/going-beyond-99-mnist-handwritten-digits-recognition-cfff96337392
 from typing import List, Callable
 from tinygrad import Tensor, TinyJit, nn, GlobalCounters
+from tinygrad.helpers import getenv, colored
 from extra.datasets import fetch_mnist
 from tqdm import trange
 
@@ -42,3 +43,8 @@ if __name__ == "__main__":
     loss = train_step()
     if i%10 == 9: test_acc = get_test_acc().item()
     t.set_description(f"loss: {loss.item():6.2f} test_accuracy: {test_acc:5.2f}%")
+
+  # verify eval acc
+  if target := getenv("TARGET_EVAL_ACC_PCT", 0.0):
+    if test_acc >= target: print(colored(f"{test_acc=} >= {target}", "green"))
+    else: raise ValueError(colored(f"{test_acc=} < {target}", "red"))
\ No newline at end of file