mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
upcast overflowed idx to int64 [pr] (#8268)
* use full_shape to determine if index can potentially overflow * update comment * use shapetracker to check max index value * wip * lint * handle mask * upcast to int64 by st is noop on WGSL * fix comments * Handle negative overflow, intermediaries overflow, int64 support handle negative overflow handle symbolic wip handle intermediate values wip check if typemap support int64 lint comment * add invalid_dtype lint * Fix bug on checking mask overflow wip wip * Add more tests, need to resolve partial upcast test Valid_view_dup test valid op overflow refine test cases clean up cleanup wip refine tests lint * Upcast is handled by lower_load_store upcast as graph_rewrite to backtrack update test wip cleanup wip cleanup do upcast in lower_load_store lint * cleanup * do upcast within lower_load_store and mutate ctx * do upcast in get_idx and view revert lint * cleanup * Upcast in vec, const upcast to const test case 3 upcast on vector lint * simplify idx with symbolic in case of fake overflow test case4 test case 4 update test * test case4 is only for metal * try: upcast inside graph_rewrite instead of shapetracker wip * checking overflow can just be done directly on all views, with idxs * cleanup * REMOVE hard coded uop test for idx upcast * refactor cleanup refactor * do actual casting when necessary, instead of rewriting all idx hard code uop test new upcast * check dtype for int64 in webgpu * cleanup cleanup * cleanup * update tests cleanup comment cleanup cleanup * comment * comment * update comment update comment * refactor * typo * keep the scope to only upcasting * white space * Revert "white space" This reverts commit314d7eb184. * Revert "keep the scope to only upcasting" This reverts commit1ef701dd85. * sym folding is not necessary lint1 * fold symbolic lint * use symbolic simple when folding shapetracker idx * full sym folding is required after all... * Ops.CAST should retain the src min max * put rewrite to lowerer wip * start testing on higher level wip test higher level in test_tensor * find Ops.STORE in list instead of recursively * check dtype support when upcasting * remove invalid_dtype * lint * fix int64 support checks in upcast lint * skipif skipunless * revert fold to find test case * Revert "revert fold to find test case" This reverts commit225bb6e801. * test sym folding * handle ptx * wip * wip * delete hard coded uop test * lint fixes * wip * fix checking for None * lint * handle ptx * comment * dtype for overflow() * update skipIf skipUnless * assert in wgsl renderer for int64 wip * do folded_upcast in to_indexed_op, real_size uses views_to_indexed_ops * assert in lowerer for dtype support lint * Revert "assert in lowerer for dtype support" This reverts commit8e9b1b79bf. * assert dtype in kernel.py * Revert "assert dtype in kernel.py" This reverts commite29b9a9893. * wip * assert in render * remove old assert * check dtype from rendere, assert in upcast wip * smaller arange for sym fold case * linearize directly * use expand directly * lint * lint * rename * no need to check dtype in device.py * trigger pr * remove dtype assert in upcast, make wgpu fail in render * use DType for type hint instead of dtypes * assert on KeyError in tests for webgpu backend int64 * use a tuple for src * test real kernel run wip * lint error * restore * fix real_size * update test example * resolve merge stuff --------- Co-authored-by: Mesozoic Egg <mesozoic.egg@proton.mail>
This commit is contained in:
@@ -7,6 +7,12 @@ from tinygrad.helpers import getenv, temp, _METADATA, mv_address
|
||||
from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
|
||||
from hypothesis import given, settings, strategies as strat
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.ops import Ops, UOp
|
||||
from tinygrad.runtime.support.compiler_cuda import PTX
|
||||
from tinygrad.codegen.linearize import linearize_uop
|
||||
from tinygrad.codegen.uopgraph import full_graph_rewrite
|
||||
from tinygrad.codegen.lowerer import rewrite_shapetracker_with_index
|
||||
from tinygrad.dtype import DType
|
||||
|
||||
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
||||
settings.load_profile("my_profile")
|
||||
@@ -773,5 +779,73 @@ class TestTensorMetadata(unittest.TestCase):
|
||||
self.assertEqual(len(bw), 1)
|
||||
self.assertEqual(bw[0].name, "sigmoid")
|
||||
|
||||
class TestIdxUpcast(unittest.TestCase):
|
||||
def _find_op(self, ast: UOp, op: Ops):
|
||||
if ast.op is op: return ast
|
||||
for src in ast.src:
|
||||
if (ret:=self._find_op(src, op)) is not None: return ret
|
||||
def _schedule_render(self, a: Tensor):
|
||||
schedule, _ = a.schedule_with_vars()
|
||||
for s in schedule:
|
||||
if s.ast.op is Ops.SINK:
|
||||
renderer = Device[s.bufs[0].device].renderer
|
||||
uops = linearize_uop(full_graph_rewrite(rewrite_shapetracker_with_index(s.ast, renderer), renderer))
|
||||
renderer.render("test", uops)
|
||||
return uops
|
||||
|
||||
def _assert(self, dtype: DType, a: Tensor):
|
||||
uops = self._schedule_render(a)
|
||||
# Assert the dtype of the INDEX value, This will need be updated if UOp spec changes
|
||||
store = next(uop for uop in uops if uop.op is Ops.STORE)
|
||||
assert store.op is Ops.STORE
|
||||
idx = self._find_op(store, Ops.INDEX)
|
||||
if idx is not None: # PTX turns Ops.INDEX into pointer arithmetic earlier than cstyle, plus it's already cast to int64
|
||||
assert idx.op is Ops.INDEX
|
||||
idx_val = idx.src[1]
|
||||
assert idx_val.dtype is dtype
|
||||
|
||||
# use expand to generate kernel that uses large idx
|
||||
def do_op_then_assert(self, dtype: DType, dim1, dim2, dim3):
|
||||
self._assert(dtype, Tensor.empty(dim1, dim2, 1).expand(-1, -1, dim3).contiguous())
|
||||
|
||||
@unittest.skipUnless(is_dtype_supported(dtypes.long), "int64 is supported")
|
||||
def test_overflow(self):
|
||||
# 2**11, 2**11, 2**11 -> 2**33 will overflow when indexed
|
||||
self.do_op_then_assert(dtypes.long, 2048, 2048, 2048)
|
||||
|
||||
@unittest.skipUnless(is_dtype_supported(dtypes.long), "int64 is supported")
|
||||
def test_overflow_sym(self):
|
||||
self.do_op_then_assert(dtypes.long, 2048, 2048, UOp.variable("dim3", 0, 2048).bind(32))
|
||||
|
||||
def test_regular(self):
|
||||
self.do_op_then_assert(dtypes.int, 64, 64, 64)
|
||||
|
||||
def test_regular_sym(self):
|
||||
self.do_op_then_assert(dtypes.int, 2048, 2048, UOp.variable("dim3", 0, 64).bind(32))
|
||||
|
||||
@unittest.skipIf(PTX, "PTX always convert Ops.INDEX to int64")
|
||||
def test_symfold(self):
|
||||
# This would cause an overflow, but after sym fold it's within int32
|
||||
a = Tensor.arange(65535)
|
||||
uops = self._schedule_render(a)
|
||||
assert all(uop.dtype is not dtypes.long for uop in uops)
|
||||
|
||||
@unittest.skipIf(is_dtype_supported(dtypes.long), "int64 is supported")
|
||||
def test_int64_unsupported_overflow_sym(self):
|
||||
with self.assertRaises(KeyError):
|
||||
self.do_op_then_assert(dtypes.long, 2048, 2048, UOp.variable("dim3", 0, 2048).bind(32))
|
||||
|
||||
@unittest.skipIf(is_dtype_supported(dtypes.long), "int64 is supported")
|
||||
def test_int64_unsupported_overflow(self):
|
||||
with self.assertRaises(KeyError):
|
||||
self.do_op_then_assert(dtypes.long, 2048, 2048, 2048)
|
||||
|
||||
@unittest.skip("This is kept for reference, it requires large memory to run")
|
||||
def test_overflow_kernel_run(self):
|
||||
# This creates a total of 2**31+10 elements, requiring at least 2147 MB memory to run
|
||||
# Modified example from issue 3271
|
||||
a = Tensor.empty(2**11, 2**11, 1, dtype=dtypes.int8).permute((2, 0, 1)).expand((2**9+10, -1, -1)).contiguous()
|
||||
a.realize()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user