move view left to the outer graph prereqs + testing (#10725)

* move view left to the outer graph

* global view right

* dont need that one

* remove comment

* test kernelize

* simple

* split onnx, test sdxl null

* fix testing

* ugh, wrong one

* Update test.yml
This commit is contained in:
George Hotz
2025-06-09 20:43:25 -07:00
committed by GitHub
parent b7198fdcfd
commit acf72872b3
4 changed files with 81 additions and 21 deletions

View File

@@ -352,6 +352,7 @@ jobs:
uses: ./.github/actions/setup-tinygrad
with:
key: unittest-12
pydeps: "pillow"
deps: testing_unit
- name: Test README
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py
@@ -359,6 +360,8 @@ jobs:
run: PYTHONPATH="." python -m pytest -n=auto test/unit/ --durations=20
- name: Run targetted tests on NULL backend
run: PYTHONPATH="." NULL=1 python3 test/test_multitensor.py TestMultiTensor.test_data_parallel_resnet_train_step
- name: Run SDXL on NULL backend
run: MAX_BUFFER_SIZE=0 PYTHONPATH="." NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights
# TODO: support fake weights
#- name: Run LLaMA 7B on 4 fake devices
# run: NULL=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing
@@ -446,8 +449,39 @@ jobs:
- name: Run process replay tests
uses: ./.github/actions/process-replay
testonnxcpu:
name: 'ONNX (CPU) Tests'
runs-on: ubuntu-22.04
timeout-minutes: 20
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: onnxoptc
deps: testing
python-version: '3.11'
llvm: 'true'
- name: Test ONNX (CPU)
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (LLVM)
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test Additional ONNX Ops (CPU)
run: CPU=1 PYTHONPATH=. python3 test/external/external_test_onnx_ops.py
- name: Test Quantize ONNX
run: CPU=1 PYTHONPATH=. python3 test/test_quantize_onnx.py
- name: Run REMOTE=1 Test
run: |
REMOTEDEV=CPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_jit.py test/test_multitensor.py
- name: Run process replay tests
uses: ./.github/actions/process-replay
testopencl:
name: 'ONNX+Optimization Tests'
name: 'ONNX (GPU)+Optimization Tests'
runs-on: ubuntu-22.04
timeout-minutes: 20
env:
@@ -464,20 +498,10 @@ jobs:
pydeps: "tensorflow==2.15.1 tensorflow_addons"
python-version: '3.11'
opencl: 'true'
llvm: 'true'
- name: Test ONNX (GPU)
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (CPU)
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (LLVM)
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test Additional ONNX Ops (CPU)
run: CPU=1 PYTHONPATH=. python3 test/external/external_test_onnx_ops.py
- name: Test Quantize ONNX
run: CPU=1 PYTHONPATH=. python3 test/test_quantize_onnx.py
- name: Run REMOTE=1 Test
run: |
REMOTEDEV=CPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_jit.py test/test_multitensor.py
REMOTEDEV=GPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_image_dtype.py test/test_jit.py
REMOTEDEV=GPU IMAGE=2 REMOTE=1 python3 -m pytest test/test_tiny.py test/test_image_dtype.py
- name: Test Optimization Helpers

View File

@@ -376,6 +376,7 @@ if __name__ == "__main__":
parser.add_argument('--weights', type=str, help="Custom path to weights")
parser.add_argument('--timing', action='store_true', help="Print timing per step")
parser.add_argument('--noshow', action='store_true', help="Don't show the image")
parser.add_argument('--fakeweights', action='store_true', help="Load fake weights")
args = parser.parse_args()
if args.seed is not None:
@@ -383,15 +384,16 @@ if __name__ == "__main__":
model = SDXL(configs["SDXL_Base"])
default_weight_url = 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors'
weights = args.weights if args.weights else fetch(default_weight_url, 'sd_xl_base_1.0.safetensors')
loaded_weights = load_state_dict(model, safe_load(weights), strict=False, verbose=False, realize=False)
if not args.fakeweights:
default_weight_url = 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors'
weights = args.weights if args.weights else fetch(default_weight_url, 'sd_xl_base_1.0.safetensors')
loaded_weights = load_state_dict(model, safe_load(weights), strict=False, verbose=False, realize=False)
start_mem_used = GlobalCounters.mem_used
with Timing("loaded weights in ", lambda et_ns: f", {(B:=(GlobalCounters.mem_used-start_mem_used))/1e9:.2f} GB loaded at {B/et_ns:.2f} GB/s"):
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
Tensor.realize(*loaded_weights)
del loaded_weights
start_mem_used = GlobalCounters.mem_used
with Timing("loaded weights in ", lambda et_ns: f", {(B:=(GlobalCounters.mem_used-start_mem_used))/1e9:.2f} GB loaded at {B/et_ns:.2f} GB/s"):
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
Tensor.realize(*loaded_weights)
del loaded_weights
N = 1
C = 4

View File

@@ -0,0 +1,33 @@
import unittest
from tinygrad import Tensor
from tinygrad.uop import Ops
class TestKernelize(unittest.TestCase):
def test_add_reshaped(self):
a = Tensor.ones(16,16).contiguous()
b = Tensor.zeros(16,16).contiguous()
ret = (a+b).sum(axis=1)
ret_reshaped_1 = ret.reshape(4,4)
ret_reshaped_2 = ret.reshape(2,8)
ret.kernelize()
self.assertIs(ret_reshaped_1.uop.src[0], ret_reshaped_2.uop.src[0])
def test_two_reduce(self):
a = Tensor.ones(16,16).contiguous()
a1 = a.sum(axis=1)
a0 = a1.sum(axis=0)
a0.kernelize()
self.assertIs(a1.uop.base.op, Ops.ASSIGN)
def test_two_reduce_w_add(self):
a = Tensor.ones(16,16).contiguous()
a1 = a.sum(axis=1)
a0 = (a1+1).sum(axis=0)
a0.kernelize()
# NOTE: the +1 is fused with a1, so a1 is not kernelized
self.assertIs(a1.uop.base.op, Ops.REDUCE_AXIS)
# the input to the REDUCE_AXIS is an ASSIGN though
self.assertIs(a1.uop.base.src[0].base.op, Ops.ASSIGN)
if __name__ == '__main__':
unittest.main()

View File

@@ -50,7 +50,8 @@ def copy_reorder_view(copy:UOp, view:UOp, base:UOp):
if prod(view.shape) < prod(base.shape): return view.contiguous().copy_to_device(copy.device)
return base.copy_to_device(copy.device).view(view.arg)
ALWAYS_CONTIGUOUS = {Ops.CONTIGUOUS, Ops.ASSIGN, Ops.COPY, Ops.BUFFER, Ops.BUFFER_VIEW, Ops.CONST, Ops.BIND, Ops.DEVICE, Ops.MSELECT, Ops.MSTACK}
ALWAYS_CONTIGUOUS = {Ops.CONTIGUOUS, Ops.ASSIGN, Ops.COPY, Ops.BUFFER, Ops.BUFFER_VIEW,
Ops.CONST, Ops.BIND, Ops.DEVICE, Ops.MSELECT, Ops.MSTACK, Ops.GBARRIER}
sym = symbolic_simple+PatternMatcher([
# UOp with size 0 is zero
@@ -356,7 +357,7 @@ view_right = merge_views+PatternMatcher([
# apply view after reduceops
(UPat(Ops.REDUCE_AXIS, src=(UPat(Ops.VIEW, src=(UPat(GroupOp.All-ALWAYS_CONTIGUOUS, name="src"),), name="v"),), name="r"), reduceop_view_right),
# apply view after elementwise ops
(UPat(GroupOp.All-{Ops.SINK}, name="root"), elementwise_view_right),
(UPat(GroupOp.All-{Ops.SINK, Ops.GBARRIER}, name="root"), elementwise_view_right),
# merge axes for double reduce (invert of SPLIT_REDUCEOP=1)
(UPat(Ops.REDUCE_AXIS, src=(UPat(Ops.REDUCE_AXIS, name="r1"),), name="r2"),
lambda r1,r2: r1.replace(arg=(r1.arg[0], r2.arg[1]+r1.arg[1])) if r1.arg[0] is r2.arg[0] else None),