mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
do not use getenv('PTX') in tests (#12095)
* test without ptx * fix tests * fix test * linters
This commit is contained in:
@@ -6,6 +6,7 @@ from tinygrad.engine.realize import run_schedule
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
|
||||
from tinygrad.uop.ops import Ops
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
|
||||
class TestArange(unittest.TestCase):
|
||||
def _get_flops(self, N, opts=None):
|
||||
@@ -26,7 +27,7 @@ class TestArange(unittest.TestCase):
|
||||
print(f"{f1=}, {f2=}")
|
||||
# add 1 to avoid divide by 0. arange is 0 flops now!
|
||||
assert (f1 < 6000 and f2 < 6000) or ((f2+1) / (f1+1) < 16), f"bad complexity, flops {(f2+1) / (f1+1):.1f}X while inputs 10X"
|
||||
if limit is not None and not getenv("PTX"):
|
||||
if limit is not None and not isinstance(Device[Device.DEFAULT].renderer, PTXRenderer):
|
||||
# PTX counts index ALU in flops
|
||||
assert f1 <= limit, f"{f1=}, {limit=}"
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import Any, List
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.helpers import getenv, DEBUG, CI
|
||||
from tinygrad.dtype import DType, DTYPES_DICT, least_upper_dtype, fp8_to_float, float_to_fp8, _to_np_dtype, _to_torch_dtype
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
from tinygrad import Device, Tensor, dtypes
|
||||
from hypothesis import assume, given, settings, strategies as strat
|
||||
from test.helpers import rand_for_dtype
|
||||
@@ -49,7 +50,7 @@ def _test_cast(a:Tensor, target_dtype:DType):
|
||||
|
||||
_test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(_to_np_dtype(target_dtype))))
|
||||
def _test_bitcast(a:Tensor, target_dtype:DType, target=None):
|
||||
if getenv("PTX") and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize:
|
||||
if isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize:
|
||||
raise unittest.SkipTest("shape changing bitcast of int8 broken on PTX")
|
||||
expected = torch.tensor(a.tolist(), dtype=_to_torch_storage_type(a.dtype)).view(_to_torch_dtype(target_dtype))
|
||||
_test_op(lambda: a.bitcast(target_dtype), target_dtype, target or expected.tolist())
|
||||
@@ -100,7 +101,7 @@ class TestDType(unittest.TestCase):
|
||||
))
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "PYTHON", "skip for now")
|
||||
@unittest.skipIf(getenv("PTX"), "skip for now")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "skip for now")
|
||||
def test_uint_overflow(self):
|
||||
if not dtypes.is_unsigned(self.DTYPE): raise unittest.SkipTest("only for unsigned")
|
||||
v = dtypes.max(self.DTYPE)
|
||||
@@ -255,7 +256,8 @@ class TestFloatDType(TestDType):
|
||||
|
||||
class TestDoubleDType(TestDType):
|
||||
DTYPE = dtypes.double
|
||||
@unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or getenv("PTX"), "conversion not supported on CI CUDA and PTX") # TODO: why not?
|
||||
@unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or \
|
||||
isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "conversion not supported on CI CUDA and PTX") # TODO: why not?
|
||||
def test_float64_increased_precision(self):
|
||||
for func in [
|
||||
lambda t: t.exp(),
|
||||
@@ -279,21 +281,21 @@ class TestDoubleDType(TestDType):
|
||||
|
||||
class TestInt8DType(TestDType):
|
||||
DTYPE = dtypes.int8
|
||||
@unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently")
|
||||
@unittest.skipIf(getenv("CUDA",0)==1 or isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "cuda saturation works differently")
|
||||
def test_int8_to_uint8_negative(self):
|
||||
_test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint8), dtypes.uint8, [255, 254, 253, 252])
|
||||
|
||||
def test_int8_to_uint16_negative(self):
|
||||
_test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint16), dtypes.uint16, [2**16-1, 2**16-2, 2**16-3, 2**16-4])
|
||||
|
||||
@unittest.skipIf(getenv("PTX"), "broken in ptx")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken in ptx")
|
||||
def test_bitcast_alt(self):
|
||||
a = Tensor([72, -90, 27, 40, -53, 70, 96, 51], dtype=dtypes.int8).bitcast(dtypes.short)
|
||||
self.assertListEqual(a.tolist(), [-22968, 10267, 18123, 13152])
|
||||
|
||||
class TestUint8DType(TestDType):
|
||||
DTYPE = dtypes.uint8
|
||||
@unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently")
|
||||
@unittest.skipIf(getenv("CUDA",0)==1 or isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "cuda saturation works differently")
|
||||
def test_uint8_to_int8_overflow(self):
|
||||
_test_op(lambda: Tensor([255, 254, 253, 252], dtype=dtypes.uint8).cast(dtypes.int8), dtypes.int8, [-1, -2, -3, -4])
|
||||
|
||||
@@ -301,7 +303,7 @@ class TestBitCast(unittest.TestCase):
|
||||
@given(strat.sampled_from(dtype_ints + dtype_floats), strat.sampled_from(dtype_ints + dtype_floats))
|
||||
def test_shape_change_bitcast(self, dt1, dt2):
|
||||
# NOTE: this has to be assume to prevent hypothesis from skipping all samples
|
||||
assume(not (getenv("PTX") and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX
|
||||
assume(not (isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX
|
||||
data = rand_for_dtype(dt1, 32).reshape(2, 2, 8)
|
||||
expected = torch.tensor(data.tolist(), dtype=_to_torch_storage_type(dt1)).view(_to_torch_dtype(dt2))
|
||||
_test_op(lambda: Tensor(data, dtype=dt1).bitcast(dt2), dt2, expected.tolist())
|
||||
|
||||
@@ -5,6 +5,7 @@ from tinygrad.helpers import CI, getenv
|
||||
from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.runtime.ops_python import from_storage_scalar
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, strategies as strat, settings, HealthCheck
|
||||
@@ -91,7 +92,7 @@ def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
|
||||
an, bn, cn = np.array([a]).astype(_to_np_dtype(d1)), np.array([b]).astype(_to_np_dtype(d1)), np.array([c]).astype(_to_np_dtype(d2))
|
||||
tensor_value = op2[0](op1[0](at, bt).cast(d2), ct).numpy()
|
||||
numpy_value = op2[1](op1[1](an, bn).astype(_to_np_dtype(d2)), cn)
|
||||
np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if getenv("PTX") else 1e-7)
|
||||
np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) else 1e-7)
|
||||
|
||||
class TestDTypeALU(unittest.TestCase):
|
||||
@unittest.skipUnless(is_dtype_supported(dtypes.float64), f"no float64 on {Device.DEFAULT}")
|
||||
|
||||
@@ -10,9 +10,10 @@ from tinygrad.shape.shapetracker import ShapeTracker
|
||||
from tinygrad.shape.view import View
|
||||
from tinygrad.tensor import Tensor, _to_np_dtype
|
||||
from tinygrad.engine.realize import run_schedule, lower_schedule, CompiledRunner, get_program
|
||||
from tinygrad.helpers import Context, getenv, flatten, dedup, TC_SELECT, TC_OPT
|
||||
from tinygrad.helpers import Context, flatten, dedup, TC_SELECT, TC_OPT
|
||||
from tinygrad.dtype import DType, dtypes, PtrDType, AddrSpace
|
||||
from tinygrad.codegen import apply_rewrites, rewrites_for_views
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
|
||||
class TestLinearizer(unittest.TestCase):
|
||||
def test_arg_dedup(self):
|
||||
@@ -155,7 +156,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared")
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4")
|
||||
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason")
|
||||
def test_upcast_with_locals(self):
|
||||
x, y = Tensor.rand(1,128), Tensor.rand(128, 128)
|
||||
r = (x@y).relu()
|
||||
@@ -366,7 +367,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
helper(Tensor.arange(255), max_ops=2)
|
||||
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4")
|
||||
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason")
|
||||
def test_grouped_store_phis(self):
|
||||
"""
|
||||
float4 acc0 = float4(0.0,0.0,0.0,0.0);
|
||||
@@ -420,7 +421,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared")
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4")
|
||||
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason")
|
||||
def test_grouped_store_local_only(self):
|
||||
x, y = Tensor.rand(1,128), Tensor.rand(128, 128)
|
||||
r = (x@y).relu()
|
||||
|
||||
@@ -6,10 +6,10 @@ import unittest
|
||||
from tinygrad import Device, dtypes
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.uop.ops import UOp, Ops, AxisType, KernelInfo
|
||||
from tinygrad.helpers import getenv
|
||||
from tinygrad.shape.shapetracker import ShapeTracker, View
|
||||
from tinygrad.codegen.opt.search import Opt, OptOps
|
||||
from tinygrad.engine.realize import get_program
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
|
||||
class TestLinearizerFailure(unittest.TestCase):
|
||||
@unittest.expectedFailure
|
||||
@@ -93,7 +93,7 @@ class TestLinearizerDumb(unittest.TestCase):
|
||||
|
||||
@unittest.expectedFailure
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "need float4")
|
||||
@unittest.skipIf(getenv("PTX"), "this is somehow correct in PTX")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "this is somehow correct in PTX")
|
||||
def test_upcasted_stores_out_of_order(self):
|
||||
c0 = UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(9360), arg=0, src=())
|
||||
c1 = c0.view(ShapeTracker(views=(View(shape=(4, 5, 13, 1, 1, 1, 1, 1, 4, 3, 3), strides=(2340, 468, 36, 0, 0, 0, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=True),)))
|
||||
|
||||
@@ -9,6 +9,7 @@ from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.engine.realize import lower_schedule, CompiledRunner
|
||||
from hypothesis import given, settings, strategies as strat
|
||||
from test.helpers import not_support_multi_device
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
|
||||
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
||||
settings.load_profile("my_profile")
|
||||
@@ -98,7 +99,7 @@ class TestRandomness(unittest.TestCase):
|
||||
|
||||
np.testing.assert_allclose(jr, r)
|
||||
|
||||
@unittest.skipIf(getenv("PTX"), "fails with PTX")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "fails with PTX")
|
||||
def test_threefry_doesnt_use_long(self):
|
||||
for (_,ei) in lower_schedule(Tensor.rand(20).schedule()):
|
||||
if isinstance(ei.prg, CompiledRunner):
|
||||
|
||||
@@ -9,7 +9,7 @@ from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
|
||||
from hypothesis import given, settings, strategies as strat
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.uop.ops import Ops, UOp
|
||||
from tinygrad.runtime.support.compiler_cuda import PTX
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
from tinygrad.codegen import full_rewrite
|
||||
from tinygrad.dtype import DType
|
||||
|
||||
@@ -915,7 +915,7 @@ class TestIdxUpcast(unittest.TestCase):
|
||||
def test_regular_sym(self):
|
||||
self.do_op_then_assert(dtypes.int, 2048, 2048, UOp.variable("dim3", 1, 64).bind(32))
|
||||
|
||||
@unittest.skipIf(PTX, "PTX always convert Ops.INDEX to int64")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "PTX always convert Ops.INDEX to int64")
|
||||
def test_symfold(self):
|
||||
# This would cause an overflow, but after sym fold it's within int32
|
||||
a = Tensor.arange(65535)
|
||||
|
||||
@@ -15,6 +15,7 @@ from tinygrad.codegen import full_rewrite
|
||||
from tinygrad.uop.symbolic import sym
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
|
||||
def to_uops_list(u:list[UOp], opts=None, skip_check=False) -> list[UOp]: return full_rewrite(UOp.sink(*u), opts)
|
||||
|
||||
@@ -130,9 +131,9 @@ class TestFloatUOps(TestUOps):
|
||||
class TestNonFloatUOps(TestUOps):
|
||||
def test_add_int32(self): self._test_bop_fxn(Ops.ADD, lambda a,b: int(a)+int(b), (dtypes.int32, dtypes.int32))
|
||||
def test_mul_int32(self): self._test_bop_fxn(Ops.MUL, lambda a,b: int(a)*int(b), (dtypes.int32, dtypes.int32))
|
||||
@unittest.skipUnless(getenv("PTX"), "only ptx uses bitshifts")
|
||||
@unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "only ptx uses bitshifts")
|
||||
def test_shr_int32(self): self._test_bop_fxn(Ops.SHR, lambda a,b: int(a)>>int(b), (dtypes.int32, dtypes.int32), no_b_neg=True)
|
||||
@unittest.skipUnless(getenv("PTX"), "only ptx uses bitshifts")
|
||||
@unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "only ptx uses bitshifts")
|
||||
def test_shl_int32(self): self._test_bop_fxn(Ops.SHL, lambda a,b: int(a)<<int(b), (dtypes.int32, dtypes.int32), no_b_neg=True)
|
||||
def test_div_int32(self):
|
||||
self._test_bop_fxn(Ops.IDIV, lambda a,b: int(a/b), (dtypes.int32, dtypes.int32), no_b_zero=True)
|
||||
@@ -370,7 +371,7 @@ class TestLocalAccess(unittest.TestCase):
|
||||
sres = uop(uops, Ops.LOAD, dtypes.int32, (smem.index(ofs),))
|
||||
self.assertEqual(_test_uops_result(dtypes.int32, uops, sres), 42)
|
||||
|
||||
@unittest.skipUnless(getenv("PTX"), "This only tests assembly backends")
|
||||
@unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "This only tests assembly backends")
|
||||
class TestAssembly(unittest.TestCase):
|
||||
def test_bitshift_left(self):
|
||||
g1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0)
|
||||
|
||||
@@ -8,6 +8,7 @@ from tinygrad.uop.ops import Ops, UOp
|
||||
from tinygrad.dtype import dtypes
|
||||
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
|
||||
def flops_mem(uops, ignore_indexing=False):
|
||||
est = Estimates.from_uops(uops, ignore_indexing)
|
||||
@@ -158,7 +159,7 @@ class TestUOpsStats(unittest.TestCase):
|
||||
self.assertEqual(flops_mem(uops), flops_mem(uops_fma))
|
||||
|
||||
N = 64
|
||||
@unittest.skipIf(getenv("PTX"), "wrong in PTX") # maybe?
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "wrong in PTX") # maybe?
|
||||
class TestStatsOptimized(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
|
||||
Reference in New Issue
Block a user