do not use getenv('PTX') in tests (#12095)

* test without ptx

* fix tests

* fix test

* linters
This commit is contained in:
nimlgen
2025-09-10 14:04:07 +03:00
committed by GitHub
parent 0e420e68b4
commit 551560b87c
9 changed files with 30 additions and 22 deletions

View File

@@ -6,6 +6,7 @@ from tinygrad.engine.realize import run_schedule
from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
from tinygrad.uop.ops import Ops
from tinygrad.renderer.ptx import PTXRenderer
class TestArange(unittest.TestCase):
def _get_flops(self, N, opts=None):
@@ -26,7 +27,7 @@ class TestArange(unittest.TestCase):
print(f"{f1=}, {f2=}")
# add 1 to avoid divide by 0. arange is 0 flops now!
assert (f1 < 6000 and f2 < 6000) or ((f2+1) / (f1+1) < 16), f"bad complexity, flops {(f2+1) / (f1+1):.1f}X while inputs 10X"
if limit is not None and not getenv("PTX"):
if limit is not None and not isinstance(Device[Device.DEFAULT].renderer, PTXRenderer):
# PTX counts index ALU in flops
assert f1 <= limit, f"{f1=}, {limit=}"

View File

@@ -5,6 +5,7 @@ from typing import Any, List
from tinygrad.device import is_dtype_supported
from tinygrad.helpers import getenv, DEBUG, CI
from tinygrad.dtype import DType, DTYPES_DICT, least_upper_dtype, fp8_to_float, float_to_fp8, _to_np_dtype, _to_torch_dtype
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad import Device, Tensor, dtypes
from hypothesis import assume, given, settings, strategies as strat
from test.helpers import rand_for_dtype
@@ -49,7 +50,7 @@ def _test_cast(a:Tensor, target_dtype:DType):
_test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(_to_np_dtype(target_dtype))))
def _test_bitcast(a:Tensor, target_dtype:DType, target=None):
if getenv("PTX") and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize:
if isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize:
raise unittest.SkipTest("shape changing bitcast of int8 broken on PTX")
expected = torch.tensor(a.tolist(), dtype=_to_torch_storage_type(a.dtype)).view(_to_torch_dtype(target_dtype))
_test_op(lambda: a.bitcast(target_dtype), target_dtype, target or expected.tolist())
@@ -100,7 +101,7 @@ class TestDType(unittest.TestCase):
))
@unittest.skipIf(Device.DEFAULT == "PYTHON", "skip for now")
@unittest.skipIf(getenv("PTX"), "skip for now")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "skip for now")
def test_uint_overflow(self):
if not dtypes.is_unsigned(self.DTYPE): raise unittest.SkipTest("only for unsigned")
v = dtypes.max(self.DTYPE)
@@ -255,7 +256,8 @@ class TestFloatDType(TestDType):
class TestDoubleDType(TestDType):
DTYPE = dtypes.double
@unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or getenv("PTX"), "conversion not supported on CI CUDA and PTX") # TODO: why not?
@unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or \
isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "conversion not supported on CI CUDA and PTX") # TODO: why not?
def test_float64_increased_precision(self):
for func in [
lambda t: t.exp(),
@@ -279,21 +281,21 @@ class TestDoubleDType(TestDType):
class TestInt8DType(TestDType):
DTYPE = dtypes.int8
@unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently")
@unittest.skipIf(getenv("CUDA",0)==1 or isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "cuda saturation works differently")
def test_int8_to_uint8_negative(self):
_test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint8), dtypes.uint8, [255, 254, 253, 252])
def test_int8_to_uint16_negative(self):
_test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint16), dtypes.uint16, [2**16-1, 2**16-2, 2**16-3, 2**16-4])
@unittest.skipIf(getenv("PTX"), "broken in ptx")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken in ptx")
def test_bitcast_alt(self):
a = Tensor([72, -90, 27, 40, -53, 70, 96, 51], dtype=dtypes.int8).bitcast(dtypes.short)
self.assertListEqual(a.tolist(), [-22968, 10267, 18123, 13152])
class TestUint8DType(TestDType):
DTYPE = dtypes.uint8
@unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently")
@unittest.skipIf(getenv("CUDA",0)==1 or isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "cuda saturation works differently")
def test_uint8_to_int8_overflow(self):
_test_op(lambda: Tensor([255, 254, 253, 252], dtype=dtypes.uint8).cast(dtypes.int8), dtypes.int8, [-1, -2, -3, -4])
@@ -301,7 +303,7 @@ class TestBitCast(unittest.TestCase):
@given(strat.sampled_from(dtype_ints + dtype_floats), strat.sampled_from(dtype_ints + dtype_floats))
def test_shape_change_bitcast(self, dt1, dt2):
# NOTE: this has to be assume to prevent hypothesis from skipping all samples
assume(not (getenv("PTX") and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX
assume(not (isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX
data = rand_for_dtype(dt1, 32).reshape(2, 2, 8)
expected = torch.tensor(data.tolist(), dtype=_to_torch_storage_type(dt1)).view(_to_torch_dtype(dt2))
_test_op(lambda: Tensor(data, dtype=dt1).bitcast(dt2), dt2, expected.tolist())

View File

@@ -5,6 +5,7 @@ from tinygrad.helpers import CI, getenv
from tinygrad.tensor import _to_np_dtype
from tinygrad.device import is_dtype_supported
from tinygrad.runtime.ops_python import from_storage_scalar
from tinygrad.renderer.ptx import PTXRenderer
import numpy as np
import pytest
from hypothesis import given, strategies as strat, settings, HealthCheck
@@ -91,7 +92,7 @@ def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
an, bn, cn = np.array([a]).astype(_to_np_dtype(d1)), np.array([b]).astype(_to_np_dtype(d1)), np.array([c]).astype(_to_np_dtype(d2))
tensor_value = op2[0](op1[0](at, bt).cast(d2), ct).numpy()
numpy_value = op2[1](op1[1](an, bn).astype(_to_np_dtype(d2)), cn)
np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if getenv("PTX") else 1e-7)
np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if isinstance(Device[Device.DEFAULT].renderer, PTXRenderer) else 1e-7)
class TestDTypeALU(unittest.TestCase):
@unittest.skipUnless(is_dtype_supported(dtypes.float64), f"no float64 on {Device.DEFAULT}")

View File

@@ -10,9 +10,10 @@ from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.view import View
from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.engine.realize import run_schedule, lower_schedule, CompiledRunner, get_program
from tinygrad.helpers import Context, getenv, flatten, dedup, TC_SELECT, TC_OPT
from tinygrad.helpers import Context, flatten, dedup, TC_SELECT, TC_OPT
from tinygrad.dtype import DType, dtypes, PtrDType, AddrSpace
from tinygrad.codegen import apply_rewrites, rewrites_for_views
from tinygrad.renderer.ptx import PTXRenderer
class TestLinearizer(unittest.TestCase):
def test_arg_dedup(self):
@@ -155,7 +156,7 @@ class TestLinearizer(unittest.TestCase):
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared")
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4")
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason")
def test_upcast_with_locals(self):
x, y = Tensor.rand(1,128), Tensor.rand(128, 128)
r = (x@y).relu()
@@ -366,7 +367,7 @@ class TestLinearizer(unittest.TestCase):
helper(Tensor.arange(255), max_ops=2)
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4")
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason")
def test_grouped_store_phis(self):
"""
float4 acc0 = float4(0.0,0.0,0.0,0.0);
@@ -420,7 +421,7 @@ class TestLinearizer(unittest.TestCase):
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared")
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "test requires float4")
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "broken on ptx for some reason")
def test_grouped_store_local_only(self):
x, y = Tensor.rand(1,128), Tensor.rand(128, 128)
r = (x@y).relu()

View File

@@ -6,10 +6,10 @@ import unittest
from tinygrad import Device, dtypes
from tinygrad.device import is_dtype_supported
from tinygrad.uop.ops import UOp, Ops, AxisType, KernelInfo
from tinygrad.helpers import getenv
from tinygrad.shape.shapetracker import ShapeTracker, View
from tinygrad.codegen.opt.search import Opt, OptOps
from tinygrad.engine.realize import get_program
from tinygrad.renderer.ptx import PTXRenderer
class TestLinearizerFailure(unittest.TestCase):
@unittest.expectedFailure
@@ -93,7 +93,7 @@ class TestLinearizerDumb(unittest.TestCase):
@unittest.expectedFailure
@unittest.skipUnless(Device[Device.DEFAULT].renderer.supports_float4, "need float4")
@unittest.skipIf(getenv("PTX"), "this is somehow correct in PTX")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "this is somehow correct in PTX")
def test_upcasted_stores_out_of_order(self):
c0 = UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(9360), arg=0, src=())
c1 = c0.view(ShapeTracker(views=(View(shape=(4, 5, 13, 1, 1, 1, 1, 1, 4, 3, 3), strides=(2340, 468, 36, 0, 0, 0, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=True),)))

View File

@@ -9,6 +9,7 @@ from tinygrad.device import is_dtype_supported
from tinygrad.engine.realize import lower_schedule, CompiledRunner
from hypothesis import given, settings, strategies as strat
from test.helpers import not_support_multi_device
from tinygrad.renderer.ptx import PTXRenderer
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
settings.load_profile("my_profile")
@@ -98,7 +99,7 @@ class TestRandomness(unittest.TestCase):
np.testing.assert_allclose(jr, r)
@unittest.skipIf(getenv("PTX"), "fails with PTX")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "fails with PTX")
def test_threefry_doesnt_use_long(self):
for (_,ei) in lower_schedule(Tensor.rand(20).schedule()):
if isinstance(ei.prg, CompiledRunner):

View File

@@ -9,7 +9,7 @@ from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
from hypothesis import given, settings, strategies as strat
from tinygrad.device import is_dtype_supported
from tinygrad.uop.ops import Ops, UOp
from tinygrad.runtime.support.compiler_cuda import PTX
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.codegen import full_rewrite
from tinygrad.dtype import DType
@@ -915,7 +915,7 @@ class TestIdxUpcast(unittest.TestCase):
def test_regular_sym(self):
self.do_op_then_assert(dtypes.int, 2048, 2048, UOp.variable("dim3", 1, 64).bind(32))
@unittest.skipIf(PTX, "PTX always convert Ops.INDEX to int64")
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "PTX always convert Ops.INDEX to int64")
def test_symfold(self):
# This would cause an overflow, but after sym fold it's within int32
a = Tensor.arange(65535)

View File

@@ -15,6 +15,7 @@ from tinygrad.codegen import full_rewrite
from tinygrad.uop.symbolic import sym
from tinygrad.device import is_dtype_supported
from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.renderer.ptx import PTXRenderer
def to_uops_list(u:list[UOp], opts=None, skip_check=False) -> list[UOp]: return full_rewrite(UOp.sink(*u), opts)
@@ -130,9 +131,9 @@ class TestFloatUOps(TestUOps):
class TestNonFloatUOps(TestUOps):
def test_add_int32(self): self._test_bop_fxn(Ops.ADD, lambda a,b: int(a)+int(b), (dtypes.int32, dtypes.int32))
def test_mul_int32(self): self._test_bop_fxn(Ops.MUL, lambda a,b: int(a)*int(b), (dtypes.int32, dtypes.int32))
@unittest.skipUnless(getenv("PTX"), "only ptx uses bitshifts")
@unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "only ptx uses bitshifts")
def test_shr_int32(self): self._test_bop_fxn(Ops.SHR, lambda a,b: int(a)>>int(b), (dtypes.int32, dtypes.int32), no_b_neg=True)
@unittest.skipUnless(getenv("PTX"), "only ptx uses bitshifts")
@unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "only ptx uses bitshifts")
def test_shl_int32(self): self._test_bop_fxn(Ops.SHL, lambda a,b: int(a)<<int(b), (dtypes.int32, dtypes.int32), no_b_neg=True)
def test_div_int32(self):
self._test_bop_fxn(Ops.IDIV, lambda a,b: int(a/b), (dtypes.int32, dtypes.int32), no_b_zero=True)
@@ -370,7 +371,7 @@ class TestLocalAccess(unittest.TestCase):
sres = uop(uops, Ops.LOAD, dtypes.int32, (smem.index(ofs),))
self.assertEqual(_test_uops_result(dtypes.int32, uops, sres), 42)
@unittest.skipUnless(getenv("PTX"), "This only tests assembly backends")
@unittest.skipUnless(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "This only tests assembly backends")
class TestAssembly(unittest.TestCase):
def test_bitshift_left(self):
g1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0)

View File

@@ -8,6 +8,7 @@ from tinygrad.uop.ops import Ops, UOp
from tinygrad.dtype import dtypes
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
from tinygrad.device import Device
from tinygrad.renderer.ptx import PTXRenderer
def flops_mem(uops, ignore_indexing=False):
est = Estimates.from_uops(uops, ignore_indexing)
@@ -158,7 +159,7 @@ class TestUOpsStats(unittest.TestCase):
self.assertEqual(flops_mem(uops), flops_mem(uops_fma))
N = 64
@unittest.skipIf(getenv("PTX"), "wrong in PTX") # maybe?
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, PTXRenderer), "wrong in PTX") # maybe?
class TestStatsOptimized(unittest.TestCase):
@classmethod
def setUpClass(cls):