mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
use randn in speed_v_theoretical instead of rand (#7656)
* use randn in speed_v_theoretical instead of rand this made green gemv 20% faster... but why? * update threshold
This commit is contained in:
11
test/external/speed_v_theoretical.py
vendored
11
test/external/speed_v_theoretical.py
vendored
@@ -14,8 +14,9 @@ class TestKernelSpeed(unittest.TestCase):
|
||||
with Context(BEAM=3):
|
||||
for _ in range(10):
|
||||
with Context(BEAM=0, DEBUG=0):
|
||||
a = Tensor.rand(M, K, dtype="half").realize()
|
||||
b = Tensor.rand(K, N, dtype="half").realize()
|
||||
# TODO: randn is 20% faster than rand for gemv
|
||||
a = Tensor.randn(M, K, dtype="half").realize()
|
||||
b = Tensor.randn(K, N, dtype="half").realize()
|
||||
Device.default.synchronize()
|
||||
st = time.perf_counter()
|
||||
c = f(a, b)
|
||||
@@ -52,11 +53,11 @@ class TestKernelSpeed(unittest.TestCase):
|
||||
# TODO: smaller ones has other overhead in synchronize
|
||||
# def test_gemm_1024(self): self._test_matmul(1024, nv_tflops=8, amd_tflops=7)
|
||||
# def test_gemm_2048(self): self._test_matmul(2048, nv_tflops=50, amd_tflops=30)
|
||||
def test_gemm_4096(self): self._test_matmul(4096, nv_tflops=95, amd_tflops=65)
|
||||
def test_gemm_4096(self): self._test_matmul(4096, nv_tflops=100, amd_tflops=70)
|
||||
def test_gemm_8192(self): self._test_matmul(8192, nv_tflops=130, amd_tflops=70)
|
||||
|
||||
def test_gemv_4096_16384(self): self._test_matmul(4096, 16384, 1, nv_gbs=350, amd_gbs=270)
|
||||
def test_gemv_16384_4096(self): self._test_matmul(16384, 4096, 1, nv_gbs=320, amd_gbs=270)
|
||||
def test_gemv_16384_4096(self): self._test_matmul(16384, 4096, 1, nv_gbs=430, amd_gbs=400)
|
||||
def test_gemv_4096_16384(self): self._test_matmul(4096, 16384, 1, nv_gbs=430, amd_gbs=400)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user