mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
KFD GEMM (#4221)
added to benchmark CI and fixed duplicated filenames between cuda and ptx
This commit is contained in:
12
.github/workflows/benchmark.yml
vendored
12
.github/workflows/benchmark.yml
vendored
@@ -110,12 +110,12 @@ jobs:
|
||||
run: CUDA=1 python3 test/external/external_model_benchmark.py
|
||||
- name: Test speed vs torch
|
||||
run: CUDA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Run Tensor Core GEMM(CUDA)
|
||||
- name: Run Tensor Core GEMM (CUDA)
|
||||
run: |
|
||||
CUDA=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
|
||||
CUDA=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_bfloat16.txt
|
||||
- name: Run Tensor Core GEMM(PTX)
|
||||
run: CUDA=1 PTX=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
|
||||
- name: Run Tensor Core GEMM (PTX)
|
||||
run: CUDA=1 PTX=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_ptx.txt
|
||||
- name: Run LLaMA
|
||||
run: |
|
||||
CUDA=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
|
||||
@@ -148,6 +148,7 @@ jobs:
|
||||
torch_speed.txt
|
||||
matmul.txt
|
||||
matmul_bfloat16.txt
|
||||
matmul_ptx.txt
|
||||
llama_unjitted.txt
|
||||
llama_jitted.txt
|
||||
llama_beam.txt
|
||||
@@ -192,8 +193,10 @@ jobs:
|
||||
# run: |
|
||||
# python3 -c "import torch; print(torch.__version__)"
|
||||
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Run Tensor Core GEMM
|
||||
- name: Run Tensor Core GEMM (HSA)
|
||||
run: HSA=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
|
||||
- name: Run Tensor Core GEMM (KFD)
|
||||
run: KFD=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_kfd.txt
|
||||
- name: Run Stable Diffusion
|
||||
run: HSA=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
|
||||
- name: Run LLaMA 7B
|
||||
@@ -229,6 +232,7 @@ jobs:
|
||||
gpt2_unjitted.txt
|
||||
gpt2_jitted.txt
|
||||
matmul.txt
|
||||
matmul_kfd.txt
|
||||
sd.txt
|
||||
mixtral.txt
|
||||
|
||||
|
||||
Reference in New Issue
Block a user