fix HALF=1 in test_speed_v_torch (#7642)

* fix HALF=1 in test_speed_v_torch

"operation cache defeats" adds 1 to all arg, which were centered around 0. adding 1 makes big matmul and matvec go inf.

fixed by subtract 1 after and bumpped tolerance for half input

* bigger tol for BIG=2, update CI too

* bigger tol
This commit is contained in:
chenyu
2024-11-11 14:29:37 -05:00
committed by GitHub
parent 4d81b7952a
commit bfab03288d
2 changed files with 5 additions and 3 deletions

View File

@@ -157,7 +157,7 @@ jobs:
- name: Run model inference benchmark
run: NV=1 RUN_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: NV=1 RUN_PROCESS_REPLAY=0 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
run: NV=1 RUN_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
- name: Test tensor cores
run: |
NV=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded

View File

@@ -38,6 +38,7 @@ def helper_test_speed(f1, *args):
# operation cache defeats
args = [(x+1).realize() if isinstance(x, Tensor) else (None if x is None else (x+1)) for x in args]
args = [(x-1).realize() if isinstance(x, Tensor) else (None if x is None else (x-1)) for x in args]
# force syncing
[x.numpy() if isinstance(x, Tensor) or str(torch_device) == "cpu" else x.cpu().numpy() for x in args if x is not None]
@@ -91,8 +92,9 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args):
desc = "faster" if et_torch > et_tinygrad else "slower"
flops = save_ops*1e-6
mem = save_mem*1e-6
print(("\r" if not CI else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501
np.testing.assert_allclose(val_tinygrad, val_torch, atol=1e-3, rtol=1e-3)
print(("\r" if not CI else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501
atol, rtol = (1e-2, 1e-2) if torch_dt == torch.float16 else (1e-3, 1e-3)
np.testing.assert_allclose(val_tinygrad, val_torch, atol=atol, rtol=rtol)
def helper_test_conv(bs, in_chans, out_chans, kernel_size, img_size_y, img_size_x):
torch.manual_seed(0)