From 727de5ba1ee45eb1d17120818c6ece14a23fd01f Mon Sep 17 00:00:00 2001
From: chenyu <chenyu@fastmail.com>
Date: Wed, 20 Mar 2024 12:48:22 -0400
Subject: [PATCH] llama 7B on 3090 benchmark (#3837)

* llama 7B on 3090 benchmark

* symlink llama
---
 .github/workflows/benchmark.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index ff14263b2b..01fbf58a2c 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -90,12 +90,22 @@ jobs:
       uses: actions/checkout@v4
     - name: Print nvidia-smi
       run: nvidia-smi
+    - name: Symlink models and datasets
+      run: |
+        mkdir -p weights
+        ln -s ~/tinygrad/weights/LLaMA weights/LLaMA
     - name: Run model inference benchmark
       run: CUDA=1 python3 test/external/external_model_benchmark.py
     - name: Test speed vs torch
       run: CUDA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
     - name: Run Tensor Core GEMM
       run: CUDA=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
+    - name: Run LLaMA
+      run: |
+        CUDA=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
+        CUDA=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt
+    - name: Run LLaMA with BEAM
+      run: CUDA=1 JIT=1 BEAM=2 CACHELEVEL=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt
     - name: Run GPT2
       run: |
         CUDA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
@@ -113,6 +123,9 @@ jobs:
           onnx_inference_speed.csv
           torch_speed.txt
           matmul.txt
+          llama_unjitted.txt
+          llama_jitted.txt
+          llama_beam.txt
           gpt2_unjitted.txt
           gpt2_jitted.txt
           gpt2_half.txt