diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ee914c224a..13ef4ec0e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -270,9 +270,9 @@ jobs: - name: Run targetted tests on NULL backend run: NULL=1 python3 -m unittest test.test_multitensor.TestMultiTensor.test_data_parallel_resnet_train_step test/device/test_null.py - name: Run SDXL on NULL backend - run: MAX_BUFFER_SIZE=0 NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights + run: NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights - name: Run Clip tests for SD MLPerf on NULL backend - run: MAX_BUFFER_SIZE=0 NULL=1 python -m pytest -n=auto test/external/mlperf_stable_diffusion/external_test_models.py::TestOpenClip --durations=20 + run: NULL=1 python -m pytest -n=auto test/external/mlperf_stable_diffusion/external_test_models.py::TestOpenClip --durations=20 # TODO: support fake weights #- name: Run LLaMA 7B on 4 fake devices # run: NULL=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing @@ -453,11 +453,11 @@ jobs: - name: Test MLPerf stuff run: CL=1 python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20 - name: NULL=1 beautiful_mnist_multigpu - run: MAX_BUFFER_SIZE=0 NULL=1 python examples/beautiful_mnist_multigpu.py + run: NULL=1 python examples/beautiful_mnist_multigpu.py - name: Test Bert training - run: MAX_BUFFER_SIZE=0 NULL=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py + run: NULL=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py - name: Test llama 3 training - run: MAX_BUFFER_SIZE=0 NULL=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py + run: NULL=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py - name: Run process replay tests uses: ./.github/actions/process-replay diff --git a/tinygrad/device.py b/tinygrad/device.py index c099ac6998..3452e50fb3 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -125,7 +125,7 @@ class Buffer: def allocate(self, opaque=None, external_ptr=None) -> Buffer: assert not self.is_initialized(), "can't allocate already allocated buffer" if DEBUG >= 7: print(f"buffer: allocate {self.nbytes} bytes on {self.device}") - if MAX_BUFFER_SIZE > 0 and self.size > MAX_BUFFER_SIZE: raise RuntimeError(f"buffer of size {self.size/1e6:.2f}M is too large") + if not self.device.startswith("NULL") and self.size > MAX_BUFFER_SIZE > 0: raise RuntimeError(f"buffer of size {self.size/1e6:.2f}M is too large") self.allocator:Allocator = Device[self.device].allocator if external_ptr is not None: self.options = replace(self.options, external_ptr=external_ptr) if self.options else BufferSpec(external_ptr=external_ptr)