diff --git a/test/test_arange.py b/test/test_arange.py index a21006ac62..7e91c39d2b 100644 --- a/test/test_arange.py +++ b/test/test_arange.py @@ -6,6 +6,7 @@ from tinygrad.engine.realize import run_schedule from tinygrad.codegen.opt import Opt, OptOps from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program from tinygrad.uop.ops import Ops +from tinygrad.renderer.ptx import PTXRenderer class TestArange(unittest.TestCase): def _get_flops(self, N, opts=None): @@ -26,7 +27,7 @@ class TestArange(unittest.TestCase): print(f"{f1=}, {f2=}") # add 1 to avoid divide by 0. arange is 0 flops now! assert (f1 < 6000 and f2 < 6000) or ((f2+1) / (f1+1) < 16), f"bad complexity, flops {(f2+1) / (f1+1):.1f}X while inputs 10X" - if limit is not None and not getenv("PTX"): + if limit is not None and not isinstance(Device[Device.DEFAULT].renderer, PTXRenderer): # PTX counts index ALU in flops assert f1 <= limit, f"{f1=}, {limit=}" diff --git a/test/test_dtype.py b/test/test_dtype.py index 164d55b5b7..3f007783a1 100644 --- a/test/test_dtype.py +++ b/test/test_dtype.py @@ -5,6 +5,7 @@ from typing import Any, List from tinygrad.device import is_dtype_supported from tinygrad.helpers import getenv, DEBUG, CI from tinygrad.dtype import DType, DTYPES_DICT, least_upper_dtype, fp8_to_float, float_to_fp8, _to_np_dtype, _to_torch_dtype +from tinygrad.renderer.ptx import PTXRenderer from tinygrad import Device, Tensor, dtypes from hypothesis import assume, given, settings, strategies as strat from test.helpers import rand_for_dtype @@ -49,7 +50,7 @@ def _test_cast(a:Tensor, target_dtype:DType): _test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(_to_np_dtype(target_dtype)))) def _test_bitcast(a:Tensor, target_dtype:DType, target=None): - if getenv("PTX") and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize: + if isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize: raise unittest.SkipTest("shape changing bitcast of int8 broken on PTX") expected = torch.tensor(a.tolist(), dtype=_to_torch_storage_type(a.dtype)).view(_to_torch_dtype(target_dtype)) _test_op(lambda: a.bitcast(target_dtype), target_dtype, target or expected.tolist()) @@ -100,7 +101,7 @@ class TestDType(unittest.TestCase): )) @unittest.skipIf(Device.DEFAULT == "PYTHON", "skip for now") - @unittest.skipIf(getenv("PTX"), "skip for now") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "skip for now") def test_uint_overflow(self): if not dtypes.is_unsigned(self.DTYPE): raise unittest.SkipTest("only for unsigned") v = dtypes.max(self.DTYPE) @@ -255,7 +256,8 @@ class TestFloatDType(TestDType): class TestDoubleDType(TestDType): DTYPE = dtypes.double - @unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or getenv("PTX"), "conversion not supported on CI CUDA and PTX") # TODO: why not? + @unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or \ + isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "conversion not supported on CI CUDA and PTX") # TODO: why not? def test_float64_increased_precision(self): for func in [ lambda t: t.exp(), @@ -279,21 +281,21 @@ class TestDoubleDType(TestDType): class TestInt8DType(TestDType): DTYPE = dtypes.int8 - @unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently") + @unittest.skipIf(getenv("CUDA",0)==1 or isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "cuda saturation works differently") def test_int8_to_uint8_negative(self): _test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint8), dtypes.uint8, [255, 254, 253, 252]) def test_int8_to_uint16_negative(self): _test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint16), dtypes.uint16, [2**16-1, 2**16-2, 2**16-3, 2**16-4]) - @unittest.skipIf(getenv("PTX"), "broken in ptx") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken in ptx") def test_bitcast_alt(self): a = Tensor([72, -90, 27, 40, -53, 70, 96, 51], dtype=dtypes.int8).bitcast(dtypes.short) self.assertListEqual(a.tolist(), [-22968, 10267, 18123, 13152]) class TestUint8DType(TestDType): DTYPE = dtypes.uint8 - @unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently") + @unittest.skipIf(getenv("CUDA",0)==1 or isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "cuda saturation works differently") def test_uint8_to_int8_overflow(self): _test_op(lambda: Tensor([255, 254, 253, 252], dtype=dtypes.uint8).cast(dtypes.int8), dtypes.int8, [-1, -2, -3, -4]) @@ -301,7 +303,7 @@ class TestBitCast(unittest.TestCase): @given(strat.sampled_from(dtype_ints + dtype_floats), strat.sampled_from(dtype_ints + dtype_floats)) def test_shape_change_bitcast(self, dt1, dt2): # NOTE: this has to be assume to prevent hypothesis from skipping all samples - assume(not (getenv("PTX") and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX + assume(not (isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX data = rand_for_dtype(dt1, 32).reshape(2, 2, 8) expected = torch.tensor(data.tolist(), dtype=_to_torch_storage_type(dt1)).view(_to_torch_dtype(dt2)) _test_op(lambda: Tensor(data, dtype=dt1).bitcast(dt2), dt2, expected.tolist()) diff --git a/test/test_dtype_alu.py b/test/test_dtype_alu.py index d1694bd58e..5f572c5559 100644 --- a/test/test_dtype_alu.py +++ b/test/test_dtype_alu.py @@ -5,6 +5,7 @@ from tinygrad.helpers import CI, getenv from tinygrad.tensor import _to_np_dtype from tinygrad.device import is_dtype_supported from tinygrad.runtime.ops_python import from_storage_scalar +from tinygrad.renderer.ptx import PTXRenderer import numpy as np import pytest from hypothesis import given, strategies as strat, settings, HealthCheck @@ -91,7 +92,7 @@ def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType): an, bn, cn = np.array([a]).astype(_to_np_dtype(d1)), np.array([b]).astype(_to_np_dtype(d1)), np.array([c]).astype(_to_np_dtype(d2)) tensor_value = op2[0](op1[0](at, bt).cast(d2), ct).numpy() numpy_value = op2[1](op1[1](an, bn).astype(_to_np_dtype(d2)), cn) - np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if getenv("PTX") else 1e-7) + np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) else 1e-7) class TestDTypeALU(unittest.TestCase): @unittest.skipUnless(is_dtype_supported(dtypes.float64), f"no float64 on {Device.DEFAULT}") diff --git a/test/test_linearizer.py b/test/test_linearizer.py index c10ca110a4..203154196a 100644 --- a/test/test_linearizer.py +++ b/test/test_linearizer.py @@ -10,9 +10,10 @@ from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.shape.view import View from tinygrad.tensor import Tensor, _to_np_dtype from tinygrad.engine.realize import run_schedule, lower_schedule, CompiledRunner, get_program -from tinygrad.helpers import Context, getenv, flatten, dedup, TC_SELECT, TC_OPT +from tinygrad.helpers import Context, flatten, dedup, TC_SELECT, TC_OPT from tinygrad.dtype import DType, dtypes, PtrDType, AddrSpace from tinygrad.codegen import apply_rewrites, rewrites_for_views +from tinygrad.renderer.ptx import PTXRenderer class TestLinearizer(unittest.TestCase): def test_arg_dedup(self): @@ -155,7 +156,7 @@ class TestLinearizer(unittest.TestCase): @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals") @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared") @unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4") - @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason") def test_upcast_with_locals(self): x, y = Tensor.rand(1,128), Tensor.rand(128, 128) r = (x@y).relu() @@ -366,7 +367,7 @@ class TestLinearizer(unittest.TestCase): helper(Tensor.arange(255), max_ops=2) @unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4") - @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason") def test_grouped_store_phis(self): """ float4 acc0 = float4(0.0,0.0,0.0,0.0); @@ -420,7 +421,7 @@ class TestLinearizer(unittest.TestCase): @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals") @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared") @unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4") - @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason") def test_grouped_store_local_only(self): x, y = Tensor.rand(1,128), Tensor.rand(128, 128) r = (x@y).relu() diff --git a/test/test_linearizer_dumb.py b/test/test_linearizer_dumb.py index 23e534b4a7..8798837b08 100644 --- a/test/test_linearizer_dumb.py +++ b/test/test_linearizer_dumb.py @@ -6,10 +6,10 @@ import unittest from tinygrad import Device, dtypes from tinygrad.device import is_dtype_supported from tinygrad.uop.ops import UOp, Ops, AxisType, KernelInfo -from tinygrad.helpers import getenv from tinygrad.shape.shapetracker import ShapeTracker, View from tinygrad.codegen.opt.search import Opt, OptOps from tinygrad.engine.realize import get_program +from tinygrad.renderer.ptx import PTXRenderer class TestLinearizerFailure(unittest.TestCase): @unittest.expectedFailure @@ -93,7 +93,7 @@ class TestLinearizerDumb(unittest.TestCase): @unittest.expectedFailure @unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "need float4") - @unittest.skipIf(getenv("PTX"), "this is somehow correct in PTX") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "this is somehow correct in PTX") def test_upcasted_stores_out_of_order(self): c0 = UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(9360), arg=0, src=()) c1 = c0.view(ShapeTracker(views=(View(shape=(4, 5, 13, 1, 1, 1, 1, 1, 4, 3, 3), strides=(2340, 468, 36, 0, 0, 0, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=True),))) diff --git a/test/test_randomness.py b/test/test_randomness.py index 580c96abdf..cd025cc831 100644 --- a/test/test_randomness.py +++ b/test/test_randomness.py @@ -9,6 +9,7 @@ from tinygrad.device import is_dtype_supported from tinygrad.engine.realize import lower_schedule, CompiledRunner from hypothesis import given, settings, strategies as strat from test.helpers import not_support_multi_device +from tinygrad.renderer.ptx import PTXRenderer settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False)) settings.load_profile("my_profile") @@ -98,7 +99,7 @@ class TestRandomness(unittest.TestCase): np.testing.assert_allclose(jr, r) - @unittest.skipIf(getenv("PTX"), "fails with PTX") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "fails with PTX") def test_threefry_doesnt_use_long(self): for (_,ei) in lower_schedule(Tensor.rand(20).schedule()): if isinstance(ei.prg, CompiledRunner): diff --git a/test/test_tensor.py b/test/test_tensor.py index 902a22f041..94c2982f0d 100644 --- a/test/test_tensor.py +++ b/test/test_tensor.py @@ -9,7 +9,7 @@ from extra.gradcheck import numerical_jacobian, jacobian, gradcheck from hypothesis import given, settings, strategies as strat from tinygrad.device import is_dtype_supported from tinygrad.uop.ops import Ops, UOp -from tinygrad.runtime.support.compiler_cuda import PTX +from tinygrad.renderer.ptx import PTXRenderer from tinygrad.codegen import full_rewrite from tinygrad.dtype import DType @@ -915,7 +915,7 @@ class TestIdxUpcast(unittest.TestCase): def test_regular_sym(self): self.do_op_then_assert(dtypes.int, 2048, 2048, UOp.variable("dim3", 1, 64).bind(32)) - @unittest.skipIf(PTX, "PTX always convert Ops.INDEX to int64") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "PTX always convert Ops.INDEX to int64") def test_symfold(self): # This would cause an overflow, but after sym fold it's within int32 a = Tensor.arange(65535) diff --git a/test/test_uops.py b/test/test_uops.py index c8c350b91f..64b9abe2de 100644 --- a/test/test_uops.py +++ b/test/test_uops.py @@ -15,6 +15,7 @@ from tinygrad.codegen import full_rewrite from tinygrad.uop.symbolic import sym from tinygrad.device import is_dtype_supported from tinygrad.codegen.opt import Opt, OptOps +from tinygrad.renderer.ptx import PTXRenderer def to_uops_list(u:list[UOp], opts=None, skip_check=False) -> list[UOp]: return full_rewrite(UOp.sink(*u), opts) @@ -130,9 +131,9 @@ class TestFloatUOps(TestUOps): class TestNonFloatUOps(TestUOps): def test_add_int32(self): self._test_bop_fxn(Ops.ADD, lambda a,b: int(a)+int(b), (dtypes.int32, dtypes.int32)) def test_mul_int32(self): self._test_bop_fxn(Ops.MUL, lambda a,b: int(a)*int(b), (dtypes.int32, dtypes.int32)) - @unittest.skipUnless(getenv("PTX"), "only ptx uses bitshifts") + @unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "only ptx uses bitshifts") def test_shr_int32(self): self._test_bop_fxn(Ops.SHR, lambda a,b: int(a)>>int(b), (dtypes.int32, dtypes.int32), no_b_neg=True) - @unittest.skipUnless(getenv("PTX"), "only ptx uses bitshifts") + @unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "only ptx uses bitshifts") def test_shl_int32(self): self._test_bop_fxn(Ops.SHL, lambda a,b: int(a)<