mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
Mesa: freedreno (#12746)
* ir3 init * got a program * 1 + 1 works * use isa_disasm instead of shader_disasm * wip * matmul works * works on py3.14 * fix const loading * skip QCOM failing tests * cleanup * args actually work * add compile-only tests * fix typo and install tinymesa * IR3 NULL backend * (float32) images work * autogen fix * fix compile only test * typo * mypy happy * compile-only uses py3.14 * bump mesa * unify qcom disassembler * float16 works * disasm shows in viz * save a line * add real del * variable workgroup sizes * simplify diff * bump line count * properly set wgsz * regen mesa * no preamble * bump lines
This commit is contained in:
committed by
GitHub
parent
947c6eefc3
commit
1c16b6e082
@@ -2,7 +2,7 @@ import time, math, unittest, functools, platform, warnings
|
||||
import numpy as np
|
||||
from typing import List, Callable
|
||||
import torch
|
||||
from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, Context, CPU_LLVM, CPU_LVP, AMD_LLVM
|
||||
from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, Context, CPU_LLVM, CPU_LVP, AMD_LLVM, EMULATE
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.device import is_dtype_supported
|
||||
@@ -16,6 +16,7 @@ if CI:
|
||||
|
||||
FORWARD_ONLY = getenv("FORWARD_ONLY", 0)
|
||||
PRINT_TENSORS = getenv("PRINT_TENSORS", 0)
|
||||
COMPILE_ONLY = Device.DEFAULT == "NULL" and not EMULATE
|
||||
|
||||
def slow_test(test_func):
|
||||
return unittest.skipIf(getenv("SKIP_SLOW_TEST"), "Skipping slow test")(test_func)
|
||||
@@ -38,6 +39,7 @@ def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, gra
|
||||
tinygrad_fp = time.monotonic() - st
|
||||
|
||||
def compare(s, tinygrad_output, torch_output, atol, rtol):
|
||||
if COMPILE_ONLY: return
|
||||
if PRINT_TENSORS: print(s, tinygrad_output, torch_output)
|
||||
try:
|
||||
assert tinygrad_output.shape == torch_output.shape, f"shape mismatch: tinygrad={tinygrad_output.shape} | torch={torch_output.shape}"
|
||||
@@ -421,8 +423,9 @@ class TestOps(unittest.TestCase):
|
||||
def test_isinf(self):
|
||||
val = [float('-inf'), 0., float('inf'), float('nan'), 1.1]
|
||||
helper_test_op(None, torch.isinf, Tensor.isinf, vals=[val], forward_only=True)
|
||||
np.testing.assert_equal(Tensor(val).isinf(detect_positive=True, detect_negative=False).numpy(), [False, False, True, False, False])
|
||||
np.testing.assert_equal(Tensor(val).isinf(detect_positive=False, detect_negative=True).numpy(), [True, False, False, False, False])
|
||||
if not COMPILE_ONLY:
|
||||
np.testing.assert_equal(Tensor(val).isinf(detect_positive=True, detect_negative=False).numpy(), [False, False, True, False, False])
|
||||
np.testing.assert_equal(Tensor(val).isinf(detect_positive=False, detect_negative=True).numpy(), [True, False, False, False, False])
|
||||
|
||||
def test_isnan(self):
|
||||
helper_test_op(None, torch.isnan, Tensor.isnan, vals=[[float('-inf'), 0., float('inf'), float('nan'), 1.1]], forward_only=True)
|
||||
@@ -594,7 +597,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op(None, lambda x: x//2, forward_only=True, vals=[[3, 4, 5]])
|
||||
helper_test_op(None, functools.partial(torch.div, rounding_mode="trunc"), Tensor.idiv, forward_only=True,
|
||||
vals=[[-4, 7, 5, 4, -7, 8], [2, -3, 8, -2, 3, 5]])
|
||||
if is_dtype_supported(dtypes.uint64):
|
||||
if is_dtype_supported(dtypes.uint64) and not COMPILE_ONLY:
|
||||
x = Tensor(2**64 - 1, dtype=dtypes.uint64).idiv(1)
|
||||
np.testing.assert_equal(x.numpy(), 2**64 - 1)
|
||||
|
||||
@@ -679,6 +682,7 @@ class TestOps(unittest.TestCase):
|
||||
# float to power of int
|
||||
helper_test_op(None, lambda x: 0.7**x, vals=[[-2,-1,0,1,2,3]], forward_only=True)
|
||||
|
||||
@unittest.skipIf(COMPILE_ONLY, "test requires runtime")
|
||||
def test_pow_const_direct(self):
|
||||
# x ** c
|
||||
def get_tiny_gradient(x, c):
|
||||
@@ -1088,8 +1092,9 @@ class TestOps(unittest.TestCase):
|
||||
# check if it returns the first index for multiple occurences
|
||||
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[2, 2]])
|
||||
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[1, 2, 2]])
|
||||
np.testing.assert_equal(Tensor([2,2]).argmax().numpy(), 0)
|
||||
np.testing.assert_equal(Tensor([1,2,2]).argmax().numpy(), 1)
|
||||
if not COMPILE_ONLY:
|
||||
np.testing.assert_equal(Tensor([2,2]).argmax().numpy(), 0)
|
||||
np.testing.assert_equal(Tensor([1,2,2]).argmax().numpy(), 1)
|
||||
helper_test_op([(10,20)], lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True)
|
||||
helper_test_op([(10,20)], lambda x: x.argmax(0, False).type(torch.int32), lambda x: x.argmax(0, False), forward_only=True)
|
||||
helper_test_op([(10,20)], lambda x: x.argmax(1, False).type(torch.int32), lambda x: x.argmax(1, False), forward_only=True)
|
||||
@@ -1107,8 +1112,9 @@ class TestOps(unittest.TestCase):
|
||||
# check if it returns the first index for multiple occurences
|
||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[2, 2]])
|
||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[3, 2, 2]])
|
||||
np.testing.assert_equal(Tensor([2,2]).argmin().numpy(), 0)
|
||||
np.testing.assert_equal(Tensor([3,2,2]).argmin().numpy(), 1)
|
||||
if not COMPILE_ONLY:
|
||||
np.testing.assert_equal(Tensor([2,2]).argmin().numpy(), 0)
|
||||
np.testing.assert_equal(Tensor([3,2,2]).argmin().numpy(), 1)
|
||||
helper_test_op([(10,20)], lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True)
|
||||
helper_test_op([(10,20)], lambda x: x.argmin(0, False).type(torch.int32), lambda x: x.argmin(0, False), forward_only=True)
|
||||
helper_test_op([(10,20)], lambda x: x.argmin(1, False).type(torch.int32), lambda x: x.argmin(1, False), forward_only=True)
|
||||
@@ -1156,12 +1162,13 @@ class TestOps(unittest.TestCase):
|
||||
lambda x: x.topk(4, dim, largest, sorted_).indices.type(torch.int32),
|
||||
lambda x: x.topk(4, dim, largest, sorted_)[1], forward_only=True)
|
||||
# repeated values
|
||||
value, indices = Tensor([1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]).topk(3)
|
||||
np.testing.assert_equal(value.numpy(), [1, 1, 1])
|
||||
np.testing.assert_equal(indices.numpy(), [0, 1, 3])
|
||||
value, indices = Tensor([1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]).topk(3, largest=False)
|
||||
np.testing.assert_equal(value.numpy(), [0, 0, 0])
|
||||
np.testing.assert_equal(indices.numpy(), [2, 4, 6])
|
||||
if not COMPILE_ONLY:
|
||||
value, indices = Tensor([1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]).topk(3)
|
||||
np.testing.assert_equal(value.numpy(), [1, 1, 1])
|
||||
np.testing.assert_equal(indices.numpy(), [0, 1, 3])
|
||||
value, indices = Tensor([1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]).topk(3, largest=False)
|
||||
np.testing.assert_equal(value.numpy(), [0, 0, 0])
|
||||
np.testing.assert_equal(indices.numpy(), [2, 4, 6])
|
||||
self.helper_test_exception([(4)], lambda x: x.topk(5), expected=(RuntimeError, ValueError))
|
||||
|
||||
@slow_test
|
||||
@@ -1313,6 +1320,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op(None, lambda x,y: x.matmul(y), lambda x,y: x@y, vals=[np.eye(8).astype(np.float32), np.eye(8).astype(np.float32)])
|
||||
@unittest.skipIf(CI and Device.DEFAULT in ["NV", "CL", "CUDA"] or (Device.DEFAULT == "CPU" and CPU_LLVM) or IMAGE
|
||||
or (Device.DEFAULT == "WEBGPU" and platform.system() == "Windows"), "not supported on these in CI/IMAGE")
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "not precise enough")
|
||||
def test_gemm_fp16(self):
|
||||
helper_test_op([(64,64), (64,64)], lambda x,y: x.half().matmul(y.half()), atol=5e-3, rtol=5e-3, grad_atol=5e-3, grad_rtol=5e-3)
|
||||
def test_gemm(self):
|
||||
@@ -1723,6 +1731,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(7,5,10)], lambda x: x[1:5:2, 3, ::4])
|
||||
helper_test_op([(7,5,10)], lambda x: x[1:5:2, None, None, 3, None, ::4])
|
||||
|
||||
@unittest.skipIf(COMPILE_ONLY, "test requires runtime")
|
||||
def test_slice_negative_strides(self):
|
||||
# Torch doesn't support slicing with negative steps
|
||||
a = np.random.randn(10, 10, 10).astype(np.float32)
|
||||
@@ -2752,6 +2761,7 @@ class TestOps(unittest.TestCase):
|
||||
n = Tensor([1, float("nan")]).max().numpy()
|
||||
assert math.isnan(n.item()), f"{n.item()} is not nan"
|
||||
|
||||
@unittest.skipIf(COMPILE_ONLY, "test requires runtime")
|
||||
def test_inf_where(self):
|
||||
x = Tensor.full((3, 3), float("inf"))
|
||||
n = (x < 0).where(x, 1).numpy()
|
||||
@@ -3168,6 +3178,7 @@ class TestOps(unittest.TestCase):
|
||||
|
||||
@unittest.skipIf((getenv("MOCKGPU") or Device.DEFAULT == "PYTHON"), "very slow on MOCKGPU because reduce does not fold")
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "webgpu runtime issue")
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "QCOM fails with: Resource deadlock avoided")
|
||||
def test_masked_select(self):
|
||||
helper_test_op([(32, 10)], lambda x: x.masked_select(x>0.5), lambda x: x.masked_select(x>0.5), forward_only=True)
|
||||
helper_test_op([(32, 10)], lambda x: x.masked_select(torch.tensor(True)), lambda x: x.masked_select(Tensor(True)), forward_only=True)
|
||||
|
||||
Reference in New Issue
Block a user