diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ab3ebe27e7..088879671c 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -22,7 +22,7 @@ jobs: - name: Symlink models and datasets run: | mkdir -p weights - ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu + ln -s ~/tinygrad/extra/disassemblers/applegpu extra/disassemblers/applegpu ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz ln -s ~/tinygrad/weights/LLaMA weights/LLaMA diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5064f1062a..18a01d085c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -167,17 +167,17 @@ jobs: - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot model compile and size run: | - DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py + DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py #python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot model correctness (float32) - run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py + run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot alt model correctness (float32) - run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx + run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot fastvits model correctness (float32) - run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx + run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx - if: ${{ matrix.task == 'onnx' }} name: Test ONNX (GPU) run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 diff --git a/.gitignore b/.gitignore index a897885df0..e4b26b4071 100644 --- a/.gitignore +++ b/.gitignore @@ -18,9 +18,8 @@ pandecode.dump vertex.bin recognize* .idea -disassemblers/applegpu -disassemblers/cuda_ioctl_sniffer *.prof +extra/disassemblers/applegpu extra/datasets/cifar-10-python.tar.gz extra/datasets/librispeech/ extra/datasets/imagenet/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f254c7c73b..a1bc8fcbd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: hooks: - id: whitespace name: strip whitespace - entry: ./strip_whitespace.sh + entry: find tinygrad -type f -name "*.py" -exec sed -i '' 's/ *$//' '{}' ';' language: system always_run: true pass_filenames: false diff --git a/LICENSE b/LICENSE index 7b2f163930..d42cf56fcd 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2023 George Hotz +Copyright (c) 2024, the tiny corp Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index 98fbde3762..d3e74900be 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@
- - tiny corp logo + tiny corp logo tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) and [karpathy/micrograd](https://github.com/karpathy/micrograd). Maintained by [tiny corp](https://tinygrad.org). diff --git a/openpilot/compile2.py b/examples/openpilot/compile2.py similarity index 100% rename from openpilot/compile2.py rename to examples/openpilot/compile2.py diff --git a/examples/openpilot/go.sh b/examples/openpilot/go.sh new file mode 100755 index 0000000000..dbd17a5e96 --- /dev/null +++ b/examples/openpilot/go.sh @@ -0,0 +1,2 @@ +#!/bin/bash +NOLOCALS=1 FLOAT16=1 DEBUGCL=1 IMAGE=2 GPU=1 python3 examples/openpilot/compile2.py diff --git a/disassemblers/adreno/.gitignore b/extra/disassemblers/adreno/.gitignore similarity index 100% rename from disassemblers/adreno/.gitignore rename to extra/disassemblers/adreno/.gitignore diff --git a/disassemblers/adreno/README b/extra/disassemblers/adreno/README similarity index 100% rename from disassemblers/adreno/README rename to extra/disassemblers/adreno/README diff --git a/disassemblers/adreno/__init__.py b/extra/disassemblers/adreno/__init__.py similarity index 100% rename from disassemblers/adreno/__init__.py rename to extra/disassemblers/adreno/__init__.py diff --git a/disassemblers/adreno/disasm-a3xx.c b/extra/disassemblers/adreno/disasm-a3xx.c similarity index 100% rename from disassemblers/adreno/disasm-a3xx.c rename to extra/disassemblers/adreno/disasm-a3xx.c diff --git a/disassemblers/adreno/instr-a3xx.h b/extra/disassemblers/adreno/instr-a3xx.h similarity index 100% rename from disassemblers/adreno/instr-a3xx.h rename to extra/disassemblers/adreno/instr-a3xx.h diff --git a/disassemblers/adreno/ir3.h b/extra/disassemblers/adreno/ir3.h similarity index 100% rename from disassemblers/adreno/ir3.h rename to extra/disassemblers/adreno/ir3.h diff --git a/disassemblers/adreno/shader_enums.h b/extra/disassemblers/adreno/shader_enums.h similarity index 100% rename from disassemblers/adreno/shader_enums.h rename to extra/disassemblers/adreno/shader_enums.h diff --git a/disassemblers/adreno/util/bitscan.h b/extra/disassemblers/adreno/util/bitscan.h similarity index 100% rename from disassemblers/adreno/util/bitscan.h rename to extra/disassemblers/adreno/util/bitscan.h diff --git a/disassemblers/adreno/util/bitset.h b/extra/disassemblers/adreno/util/bitset.h similarity index 100% rename from disassemblers/adreno/util/bitset.h rename to extra/disassemblers/adreno/util/bitset.h diff --git a/disassemblers/adreno/util/list.h b/extra/disassemblers/adreno/util/list.h similarity index 100% rename from disassemblers/adreno/util/list.h rename to extra/disassemblers/adreno/util/list.h diff --git a/disassemblers/adreno/util/macros.h b/extra/disassemblers/adreno/util/macros.h similarity index 100% rename from disassemblers/adreno/util/macros.h rename to extra/disassemblers/adreno/util/macros.h diff --git a/extra/optimization/generate_dataset.sh b/extra/optimization/generate_dataset.sh index cd4b275cb3..dde903bad8 100755 --- a/extra/optimization/generate_dataset.sh +++ b/extra/optimization/generate_dataset.sh @@ -17,7 +17,7 @@ python3 examples/beautiful_mnist.py python3 examples/beautiful_cartpole.py python3 examples/mlperf/model_spec.py python3 examples/yolov8.py ./test/models/efficientnet/Chicken.jpg -openpilot/go.sh +examples/openpilot/go.sh BIG=1 MPS=1 pytest test/ --ignore=test/test_fusion_op.py --ignore=test/test_linearizer_failures.py # sort and uniq diff --git a/extra/qcom_gpu_driver/opencl_ioctl.py b/extra/qcom_gpu_driver/opencl_ioctl.py index 32255f22bb..26f32bba7c 100644 --- a/extra/qcom_gpu_driver/opencl_ioctl.py +++ b/extra/qcom_gpu_driver/opencl_ioctl.py @@ -72,7 +72,7 @@ def parse_cmd_buf(dat): num_unit = vals[0]>>22 print(f"{num_unit=} {state_block=} {state_src=} {state_type=} {dst_off=}") - from disassemblers.adreno import disasm_raw + from extra.disassemblers.adreno import disasm_raw if state_type == ST6_SHADER: disasm_raw(get_mem(((vals[2] << 32) | vals[1]), 0x180)) if state_type == ST6_CONSTANTS: hexdump(get_mem(((vals[2] << 32) | vals[1]), min(0x180, num_unit*4))) pass diff --git a/openpilot/go.sh b/openpilot/go.sh deleted file mode 100755 index d99c706e77..0000000000 --- a/openpilot/go.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -NOLOCALS=1 FLOAT16=1 DEBUGCL=1 IMAGE=2 GPU=1 python3 openpilot/compile2.py diff --git a/push_pypi.sh b/push_pypi.sh deleted file mode 100755 index bf0f4030c3..0000000000 --- a/push_pypi.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e -rm -rf dist -ipython3 setup.py sdist bdist_wheel -twine upload dist/* - diff --git a/ruff.toml b/ruff.toml index 9a97914bfc..b0a7913a43 100644 --- a/ruff.toml +++ b/ruff.toml @@ -26,12 +26,10 @@ lint.select = [ line-length = 150 exclude = [ - "disassemblers/", "docs/", "docs-legacy/", "examples/", "extra/", - "openpilot/", "tinygrad/runtime/autogen", "test/external/mlperf_resnet", "test/external/mlperf_unet3d", diff --git a/run_multibackend.sh b/run_multibackend.sh deleted file mode 100755 index e419a0f48d..0000000000 --- a/run_multibackend.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -e -echo "********* CPU *********" -CPU=1 python3 "$@" -echo "********* GPU *********" -GPU=1 python3 "$@" -echo "********* METAL *********" -METAL=1 python3 "$@" -echo "********* CLANG *********" -CLANG=1 python3 "$@" -echo "********* LLVM *********" -LLVM=1 python3 "$@" -echo "********* TORCH *********" -TORCH=1 python3 "$@" diff --git a/strip_whitespace.sh b/strip_whitespace.sh deleted file mode 100755 index 0fae2c9761..0000000000 --- a/strip_whitespace.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -find tinygrad -type f -name "*.py" -exec sed -i '' 's/ *$//' '{}' ';' diff --git a/test/external/external_multi_gpu.py b/test/external/external_multi_gpu.py index 4721ac845a..00c02b41cb 100644 --- a/test/external/external_multi_gpu.py +++ b/test/external/external_multi_gpu.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# cd disassemblers/ && git clone --recursive github.com:geohot/cuda_ioctl_sniffer.git -# LD_PRELOAD=$PWD/disassemblers/cuda_ioctl_sniffer/out/sniff.so GPU=1 python3 test/external/external_multi_gpu.py +# cd extra/disassemblers/ && git clone --recursive github.com:geohot/cuda_ioctl_sniffer.git +# LD_PRELOAD=$PWD/extra/disassemblers/cuda_ioctl_sniffer/out/sniff.so GPU=1 python3 test/external/external_multi_gpu.py import numpy as np from tinygrad.tensor import Tensor from tinygrad.helpers import colored, Timing, getenv diff --git a/test/test_schedule.py b/test/test_schedule.py index bdfa32c838..74aefad7f6 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -790,7 +790,7 @@ class TestSchedule(unittest.TestCase): a = Tensor.rand(3, 4, 5).realize() out = a.log2().pad(((0, 1), (0, 1), (0, 1)), 1.0).sum().contiguous() run_schedule(check_schedule(out, 2)) - np.testing.assert_allclose(out.numpy(), np.pad(np.log2(a.numpy()), ((0, 1), (0, 1), (0, 1)), constant_values=1.0).sum()) + np.testing.assert_allclose(out.numpy(), np.pad(np.log2(a.numpy()), ((0, 1), (0, 1), (0, 1)), constant_values=1.0).sum(), rtol=1e-6) def test_shrink_pad_safe(self): a = Tensor.ones((3, )).contiguous().realize() diff --git a/tinygrad/runtime/ops_metal.py b/tinygrad/runtime/ops_metal.py index e690941b42..c7702b86cc 100644 --- a/tinygrad/runtime/ops_metal.py +++ b/tinygrad/runtime/ops_metal.py @@ -33,7 +33,7 @@ class MetalProgram: with tempfile.NamedTemporaryFile(delete=True) as shader: shader.write(lib) shader.flush() - os.system(f"cd {pathlib.Path(__file__).parents[2]}/disassemblers/applegpu && python3 compiler_explorer.py {shader.name}") + os.system(f"cd {pathlib.Path(__file__).parents[2]}/extra/disassemblers/applegpu && python3 compiler_explorer.py {shader.name}") assert lib[:4] == b"MTLB", "Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1." data = libdispatch.dispatch_data_create(lib, len(lib), None, None) self.library = unwrap2(self.device.device.newLibraryWithData_error_(data, None))