From 5b3d8a886ea0aa9d1eb684081cdc13e8d6ca7372 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Thu, 14 Mar 2024 14:12:32 -0700 Subject: [PATCH] split tinybox benchmark into two (#3741) * split tinybox benchmark into two * symlinks --- .github/workflows/benchmark.yml | 55 ++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 41604467fc..187823ebf5 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -131,8 +131,6 @@ jobs: steps: - name: Checkout Code uses: actions/checkout@v4 - - name: Show off tinybox - run: /opt/rocm/bin/rocm-bandwidth-test - name: Symlink models and datasets run: | mkdir -p weights @@ -143,6 +141,8 @@ jobs: ln -s /raid/weights/LLaMA-2 weights/LLaMA-2 mkdir -p extra/datasets ln -s /raid/datasets/imagenet extra/datasets/imagenet + - name: Show off tinybox + run: /opt/rocm/bin/rocm-bandwidth-test - name: Run model inference benchmark run: LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 NOCLANG=1 python3 test/external/external_model_benchmark.py - name: Test speed vs torch @@ -167,6 +167,44 @@ jobs: run: | HSA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt HSA=1 JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt + - uses: actions/upload-artifact@v4 + with: + name: Speed (AMD) + path: | + onnx_inference_speed.csv + torch_speed.txt + llama_unjitted.txt + llama_jitted.txt + llama_beam.txt + llama_2_70B.txt + gpt2_unjitted.txt + gpt2_jitted.txt + matmul.txt + sd.txt + mixtral.txt + + testmoreamdbenchmark: + name: tinybox Training + runs-on: [self-hosted, Linux, tinybox] + defaults: + run: + shell: bash -o pipefail {0} + if: github.repository_owner == 'tinygrad' + env: + PYTHONPATH: . + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Symlink models and datasets + run: | + mkdir -p weights + ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz + ln -s ~/tinygrad/weights/LLaMA weights/LLaMA + ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz + ln -s /raid/weights/mixtral-8x7b-32kseqlen weights/mixtral-8x7b-32kseqlen + ln -s /raid/weights/LLaMA-2 weights/LLaMA-2 + mkdir -p extra/datasets + ln -s /raid/datasets/imagenet extra/datasets/imagenet - name: Run 10 CIFAR training steps run: HSA=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt - name: Run 10 CIFAR training steps w HALF @@ -183,10 +221,8 @@ jobs: run: HIP=1 BENCHMARK=10 BS=624 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet.txt - uses: actions/upload-artifact@v4 with: - name: Speed (AMD) + name: Speed (AMD Training) path: | - onnx_inference_speed.csv - torch_speed.txt train_cifar.txt train_cifar_half.txt train_cifar_wino.txt @@ -194,12 +230,3 @@ jobs: train_resnet.txt train_resnet_one_gpu.txt train_cifar_six_gpu.txt - llama_unjitted.txt - llama_jitted.txt - llama_beam.txt - llama_2_70B.txt - gpt2_unjitted.txt - gpt2_jitted.txt - matmul.txt - sd.txt - mixtral.txt