diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5fb815d7ed..3cbb1c67e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -197,7 +197,7 @@ jobs: - if: ${{ matrix.task == 'optimage' }} name: Test openpilot model compile and size run: | - PYTHONPATH="." DEBUG=2 ALLOWED_KERNEL_COUNT=208 ALLOWED_GATED_READ_IMAGE=356 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py + PYTHONPATH="." DEBUG=2 ALLOWED_KERNEL_COUNT=208 ALLOWED_GATED_READ_IMAGE=354 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' - if: ${{ matrix.task == 'optimage' }} name: Test openpilot model correctness (float32) diff --git a/test/unit/test_image_valid.py b/test/unit/test_image_valid.py index afbf4cd2e3..6ed9cca560 100644 --- a/test/unit/test_image_valid.py +++ b/test/unit/test_image_valid.py @@ -61,6 +61,18 @@ class TestValidSimplification(unittest.TestCase): self.assertEqual(render((10, 10, 4), (gidx1).lt(5), UOp(UOps.VECTORIZE, dtypes.int.vec(2), (gidx0, gidx1+5))), "read_imagef(data0, smp, (int2)(gidx0,(gidx1+5)))") + def test_valid_empty_set(self): + gidx0 = Variable("gidx0", 32) + gidx1 = Variable("gidx1", 32) + shape = (1, 2, 4) + idx = UOp(UOps.VECTORIZE, dtypes.int.vec(2), (gidx0%2, gidx1+2)) + # not empty + self.assertEqual(render(shape, (gidx0).lt(8) & (-gidx0).lt(-6), idx), + "(((gidx0<8)&((gidx0*(-1))<(-6)))?read_imagef(data0, smp, (int2)((gidx0%2),(gidx1+2))):(float4)(0.0f,0.0f,0.0f,0.0f))") + + # empty + self.assertRaises(IndexError, lambda: render(shape, (gidx0).lt(8) & (-gidx0).lt(-7), idx)) + def test_simplify1(self): # idx has the form (A % m, A // m + k) and valid has (c0 < A) and (A < c1) gidx = Variable("gidx", 512) diff --git a/tinygrad/codegen/uopgraph.py b/tinygrad/codegen/uopgraph.py index 665ac8b6f6..0083655624 100644 --- a/tinygrad/codegen/uopgraph.py +++ b/tinygrad/codegen/uopgraph.py @@ -195,6 +195,9 @@ def simplify_valid_image_load(load:UOp, buf:UOp): lower = -stmt.src[1].arg+1 drop_stmt.append(stmt) + # valid is an empty set + if upper < lower: return UOp(UOps.LOAD, load.dtype, (buf, idx, invalid_val, valid.const_like(False))) + new_indx0, new_indx1 = None, None if (L:=(lower * c + d)) // m == (U:=(upper * c + d)) // m: # in the same row if (L % m - c < 0) and (U % m + c >= m): # spans the whole row