mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
second try at block linearize (#7892)
* second try at block linearize * weeee, works for lil matmul * it's so beautiful * test tiny passes * fix bugs * combine matching BLOCKENDS * wrapping * test lin failures passes * those failures were fake * flip sort order * fix ptx tests * deal with store better * dumb ptx fix * expect less * reduce lines * reduce lines * less lines and cleaner * no defaultdict * tighter * simpler block_parent_count
This commit is contained in:
2
test/external/speed_v_theoretical.py
vendored
2
test/external/speed_v_theoretical.py
vendored
@@ -88,7 +88,7 @@ class TestKernelSpeed(unittest.TestCase):
|
||||
# def test_gemm_1024(self): self._test_matmul(1024, nv_tflops=8, amd_tflops=7)
|
||||
# def test_gemm_2048(self): self._test_matmul(2048, nv_tflops=50, amd_tflops=30)
|
||||
def test_gemm_4096(self): self._test_matmul(4096, nv_tflops=95, amd_tflops=70)
|
||||
def test_gemm_8192(self): self._test_matmul(8192, nv_tflops=130, amd_tflops=70)
|
||||
def test_gemm_8192(self): self._test_matmul(8192, nv_tflops=125, amd_tflops=70)
|
||||
|
||||
def test_gemv_16384_4096(self): self._test_matmul(16384, 4096, 1, nv_gbs=430, amd_gbs=400)
|
||||
def test_gemv_4096_16384(self): self._test_matmul(4096, 16384, 1, nv_gbs=430, amd_gbs=380) # AMD was flaky at 400
|
||||
|
||||
Reference in New Issue
Block a user