mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
move view left to the outer graph prereqs + testing (#10725)
* move view left to the outer graph * global view right * dont need that one * remove comment * test kernelize * simple * split onnx, test sdxl null * fix testing * ugh, wrong one * Update test.yml
This commit is contained in:
46
.github/workflows/test.yml
vendored
46
.github/workflows/test.yml
vendored
@@ -352,6 +352,7 @@ jobs:
|
||||
uses: ./.github/actions/setup-tinygrad
|
||||
with:
|
||||
key: unittest-12
|
||||
pydeps: "pillow"
|
||||
deps: testing_unit
|
||||
- name: Test README
|
||||
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py
|
||||
@@ -359,6 +360,8 @@ jobs:
|
||||
run: PYTHONPATH="." python -m pytest -n=auto test/unit/ --durations=20
|
||||
- name: Run targetted tests on NULL backend
|
||||
run: PYTHONPATH="." NULL=1 python3 test/test_multitensor.py TestMultiTensor.test_data_parallel_resnet_train_step
|
||||
- name: Run SDXL on NULL backend
|
||||
run: MAX_BUFFER_SIZE=0 PYTHONPATH="." NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights
|
||||
# TODO: support fake weights
|
||||
#- name: Run LLaMA 7B on 4 fake devices
|
||||
# run: NULL=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing
|
||||
@@ -446,8 +449,39 @@ jobs:
|
||||
- name: Run process replay tests
|
||||
uses: ./.github/actions/process-replay
|
||||
|
||||
testonnxcpu:
|
||||
name: 'ONNX (CPU) Tests'
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
IGNORE_OOB: 0
|
||||
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Environment
|
||||
uses: ./.github/actions/setup-tinygrad
|
||||
with:
|
||||
key: onnxoptc
|
||||
deps: testing
|
||||
python-version: '3.11'
|
||||
llvm: 'true'
|
||||
- name: Test ONNX (CPU)
|
||||
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
- name: Test ONNX (LLVM)
|
||||
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
- name: Test Additional ONNX Ops (CPU)
|
||||
run: CPU=1 PYTHONPATH=. python3 test/external/external_test_onnx_ops.py
|
||||
- name: Test Quantize ONNX
|
||||
run: CPU=1 PYTHONPATH=. python3 test/test_quantize_onnx.py
|
||||
- name: Run REMOTE=1 Test
|
||||
run: |
|
||||
REMOTEDEV=CPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_jit.py test/test_multitensor.py
|
||||
- name: Run process replay tests
|
||||
uses: ./.github/actions/process-replay
|
||||
|
||||
testopencl:
|
||||
name: 'ONNX+Optimization Tests'
|
||||
name: 'ONNX (GPU)+Optimization Tests'
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
@@ -464,20 +498,10 @@ jobs:
|
||||
pydeps: "tensorflow==2.15.1 tensorflow_addons"
|
||||
python-version: '3.11'
|
||||
opencl: 'true'
|
||||
llvm: 'true'
|
||||
- name: Test ONNX (GPU)
|
||||
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
- name: Test ONNX (CPU)
|
||||
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
- name: Test ONNX (LLVM)
|
||||
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
|
||||
- name: Test Additional ONNX Ops (CPU)
|
||||
run: CPU=1 PYTHONPATH=. python3 test/external/external_test_onnx_ops.py
|
||||
- name: Test Quantize ONNX
|
||||
run: CPU=1 PYTHONPATH=. python3 test/test_quantize_onnx.py
|
||||
- name: Run REMOTE=1 Test
|
||||
run: |
|
||||
REMOTEDEV=CPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_jit.py test/test_multitensor.py
|
||||
REMOTEDEV=GPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_image_dtype.py test/test_jit.py
|
||||
REMOTEDEV=GPU IMAGE=2 REMOTE=1 python3 -m pytest test/test_tiny.py test/test_image_dtype.py
|
||||
- name: Test Optimization Helpers
|
||||
|
||||
@@ -376,6 +376,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--weights', type=str, help="Custom path to weights")
|
||||
parser.add_argument('--timing', action='store_true', help="Print timing per step")
|
||||
parser.add_argument('--noshow', action='store_true', help="Don't show the image")
|
||||
parser.add_argument('--fakeweights', action='store_true', help="Load fake weights")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.seed is not None:
|
||||
@@ -383,15 +384,16 @@ if __name__ == "__main__":
|
||||
|
||||
model = SDXL(configs["SDXL_Base"])
|
||||
|
||||
default_weight_url = 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors'
|
||||
weights = args.weights if args.weights else fetch(default_weight_url, 'sd_xl_base_1.0.safetensors')
|
||||
loaded_weights = load_state_dict(model, safe_load(weights), strict=False, verbose=False, realize=False)
|
||||
if not args.fakeweights:
|
||||
default_weight_url = 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors'
|
||||
weights = args.weights if args.weights else fetch(default_weight_url, 'sd_xl_base_1.0.safetensors')
|
||||
loaded_weights = load_state_dict(model, safe_load(weights), strict=False, verbose=False, realize=False)
|
||||
|
||||
start_mem_used = GlobalCounters.mem_used
|
||||
with Timing("loaded weights in ", lambda et_ns: f", {(B:=(GlobalCounters.mem_used-start_mem_used))/1e9:.2f} GB loaded at {B/et_ns:.2f} GB/s"):
|
||||
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
|
||||
Tensor.realize(*loaded_weights)
|
||||
del loaded_weights
|
||||
start_mem_used = GlobalCounters.mem_used
|
||||
with Timing("loaded weights in ", lambda et_ns: f", {(B:=(GlobalCounters.mem_used-start_mem_used))/1e9:.2f} GB loaded at {B/et_ns:.2f} GB/s"):
|
||||
with WallTimeEvent(BenchEvent.LOAD_WEIGHTS):
|
||||
Tensor.realize(*loaded_weights)
|
||||
del loaded_weights
|
||||
|
||||
N = 1
|
||||
C = 4
|
||||
|
||||
33
test/unit/test_kernelize.py
Normal file
33
test/unit/test_kernelize.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import unittest
|
||||
from tinygrad import Tensor
|
||||
from tinygrad.uop import Ops
|
||||
|
||||
class TestKernelize(unittest.TestCase):
|
||||
def test_add_reshaped(self):
|
||||
a = Tensor.ones(16,16).contiguous()
|
||||
b = Tensor.zeros(16,16).contiguous()
|
||||
ret = (a+b).sum(axis=1)
|
||||
ret_reshaped_1 = ret.reshape(4,4)
|
||||
ret_reshaped_2 = ret.reshape(2,8)
|
||||
ret.kernelize()
|
||||
self.assertIs(ret_reshaped_1.uop.src[0], ret_reshaped_2.uop.src[0])
|
||||
|
||||
def test_two_reduce(self):
|
||||
a = Tensor.ones(16,16).contiguous()
|
||||
a1 = a.sum(axis=1)
|
||||
a0 = a1.sum(axis=0)
|
||||
a0.kernelize()
|
||||
self.assertIs(a1.uop.base.op, Ops.ASSIGN)
|
||||
|
||||
def test_two_reduce_w_add(self):
|
||||
a = Tensor.ones(16,16).contiguous()
|
||||
a1 = a.sum(axis=1)
|
||||
a0 = (a1+1).sum(axis=0)
|
||||
a0.kernelize()
|
||||
# NOTE: the +1 is fused with a1, so a1 is not kernelized
|
||||
self.assertIs(a1.uop.base.op, Ops.REDUCE_AXIS)
|
||||
# the input to the REDUCE_AXIS is an ASSIGN though
|
||||
self.assertIs(a1.uop.base.src[0].base.op, Ops.ASSIGN)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -50,7 +50,8 @@ def copy_reorder_view(copy:UOp, view:UOp, base:UOp):
|
||||
if prod(view.shape) < prod(base.shape): return view.contiguous().copy_to_device(copy.device)
|
||||
return base.copy_to_device(copy.device).view(view.arg)
|
||||
|
||||
ALWAYS_CONTIGUOUS = {Ops.CONTIGUOUS, Ops.ASSIGN, Ops.COPY, Ops.BUFFER, Ops.BUFFER_VIEW, Ops.CONST, Ops.BIND, Ops.DEVICE, Ops.MSELECT, Ops.MSTACK}
|
||||
ALWAYS_CONTIGUOUS = {Ops.CONTIGUOUS, Ops.ASSIGN, Ops.COPY, Ops.BUFFER, Ops.BUFFER_VIEW,
|
||||
Ops.CONST, Ops.BIND, Ops.DEVICE, Ops.MSELECT, Ops.MSTACK, Ops.GBARRIER}
|
||||
|
||||
sym = symbolic_simple+PatternMatcher([
|
||||
# UOp with size 0 is zero
|
||||
@@ -356,7 +357,7 @@ view_right = merge_views+PatternMatcher([
|
||||
# apply view after reduceops
|
||||
(UPat(Ops.REDUCE_AXIS, src=(UPat(Ops.VIEW, src=(UPat(GroupOp.All-ALWAYS_CONTIGUOUS, name="src"),), name="v"),), name="r"), reduceop_view_right),
|
||||
# apply view after elementwise ops
|
||||
(UPat(GroupOp.All-{Ops.SINK}, name="root"), elementwise_view_right),
|
||||
(UPat(GroupOp.All-{Ops.SINK, Ops.GBARRIER}, name="root"), elementwise_view_right),
|
||||
# merge axes for double reduce (invert of SPLIT_REDUCEOP=1)
|
||||
(UPat(Ops.REDUCE_AXIS, src=(UPat(Ops.REDUCE_AXIS, name="r1"),), name="r2"),
|
||||
lambda r1,r2: r1.replace(arg=(r1.arg[0], r2.arg[1]+r1.arg[1])) if r1.arg[0] is r2.arg[0] else None),
|
||||
|
||||
Reference in New Issue
Block a user