wmma: add CUDA tensor core and fix test_speed_v_torch failure (#3544)

This commit is contained in:
Francis Lam
2024-03-01 17:51:02 -08:00
committed by GitHub
parent b3cdc11a58
commit e17f1821a7
8 changed files with 50 additions and 25 deletions

View File

@@ -85,6 +85,8 @@ jobs:
run: CUDA=1 python3 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: CUDA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
- name: Run Tensor Core GEMM
run: CUDA=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
- name: Run GPT2
run: |
CUDA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
@@ -99,6 +101,7 @@ jobs:
path: |
onnx_inference_speed.csv
torch_speed.txt
matmul.txt
gpt2_unjitted.txt
gpt2_jitted.txt
gpt2_half_beam.txt