diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index adc22163d5..46230bf80b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -822,6 +822,7 @@ jobs: NV_PTX: 1 NV: 1 FORWARD_ONLY: 1 + IGNORE_OOB: 0 run: | python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20 - name: Run process replay tests diff --git a/tinygrad/uop/validate.py b/tinygrad/uop/validate.py index 23e8bd4c19..02552cb495 100644 --- a/tinygrad/uop/validate.py +++ b/tinygrad/uop/validate.py @@ -1,6 +1,6 @@ from typing import Callable, cast from tinygrad.uop.ops import PatternMatcher, UPat, GroupOp, Ops, UOp, python_alu -from tinygrad.dtype import ImageDType, dtypes, Invalid +from tinygrad.dtype import ImageDType, dtypes, Invalid, PtrDType from tinygrad.helpers import IGNORE_OOB, cpu_profile try: @@ -70,6 +70,9 @@ def validate_index(buf:UOp, idx:UOp, gate:UOp|None=None): # WEBGPU has a BITCAST in the index. TODO: fix if any(x.op is Ops.BITCAST for x in idx.toposort() | gate.toposort()): return True + # PTX uses absolute addresses (pointer cast to long), skip validation + if any(x.op is Ops.CAST and isinstance(x.src[0].dtype, PtrDType) for x in idx.toposort()): return True + if not z3_imported: raise ImportError("bounds checking requires z3 >= 4.12.4, use IGNORE_OOB=1 to disable, or \"pip install 'z3-solver>=4.12.4\"") solver = z3.Solver(ctx=z3.Context()) z3_idx, z3_mask = uops_to_z3(solver, idx, gate)