mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
process replay diffs 3 things now (#5731)
* github api infra * process replay is 3 parts now * parse benchmarks * add gh_token * complete diff * move process replay tests * last successful run * add tempdir * skip master
This commit is contained in:
26
.github/workflows/benchmark.yml
vendored
26
.github/workflows/benchmark.yml
vendored
@@ -3,6 +3,7 @@ env:
|
||||
RUN_PROCESS_REPLAY: "1"
|
||||
ASSERT_PROCESS_REPLAY: "0"
|
||||
PYTHONPATH: .
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -90,9 +91,6 @@ jobs:
|
||||
# run: STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt
|
||||
- name: Run 10 CIFAR training steps w winograd
|
||||
run: JIT=2 WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt
|
||||
- name: Run process replay tests
|
||||
if: env.RUN_PROCESS_REPLAY == '1'
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Speed (Mac)
|
||||
@@ -119,6 +117,8 @@ jobs:
|
||||
train_cifar_half.txt
|
||||
train_cifar_bf16.txt
|
||||
train_cifar_wino.txt
|
||||
- name: Run process replay tests
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
|
||||
testnvidiabenchmark:
|
||||
name: tinybox green Benchmark
|
||||
@@ -200,9 +200,6 @@ jobs:
|
||||
run: NV=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt
|
||||
- name: Run GPT2 w HALF/BEAM
|
||||
run: NV=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAST_BEFORE_VIEW=0 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt
|
||||
- name: Run process replay tests
|
||||
if: env.RUN_PROCESS_REPLAY == '1'
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Speed (NVIDIA)
|
||||
@@ -229,6 +226,8 @@ jobs:
|
||||
gpt2_jitted.txt
|
||||
gpt2_half.txt
|
||||
gpt2_half_beam.txt
|
||||
- name: Run process replay tests
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
|
||||
testmorenvidiabenchmark:
|
||||
name: tinybox green Training Benchmark
|
||||
@@ -275,9 +274,6 @@ jobs:
|
||||
run: NV=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet_one_gpu.txt
|
||||
- name: Run 10 MLPerf ResNet50 training steps (6 gpu)
|
||||
run: NV=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=1536 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet.txt
|
||||
- name: Run process replay tests
|
||||
if: env.RUN_PROCESS_REPLAY == '1'
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Speed (NVIDIA Training)
|
||||
@@ -291,6 +287,8 @@ jobs:
|
||||
train_resnet.txt
|
||||
train_resnet_one_gpu.txt
|
||||
train_cifar_six_gpu.txt
|
||||
- name: Run process replay tests
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
|
||||
testamdbenchmark:
|
||||
name: tinybox red Benchmark
|
||||
@@ -368,9 +366,6 @@ jobs:
|
||||
run: AMD=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt
|
||||
- name: Run GPT2 w HALF/BEAM
|
||||
run: AMD=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAST_BEFORE_VIEW=0 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt
|
||||
- name: Run process replay tests
|
||||
if: env.RUN_PROCESS_REPLAY == '1'
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Speed (AMD)
|
||||
@@ -395,6 +390,8 @@ jobs:
|
||||
sd.txt
|
||||
sdxl.txt
|
||||
mixtral.txt
|
||||
- name: Run process replay tests
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
|
||||
testmoreamdbenchmark:
|
||||
name: tinybox red Training Benchmark
|
||||
@@ -441,9 +438,6 @@ jobs:
|
||||
run: AMD=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet_one_gpu.txt
|
||||
- name: Run 10 MLPerf ResNet50 training steps (6 gpu)
|
||||
run: AMD=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=1536 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet.txt
|
||||
- name: Run process replay tests
|
||||
if: env.RUN_PROCESS_REPLAY == '1'
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Speed (AMD Training)
|
||||
@@ -457,3 +451,5 @@ jobs:
|
||||
train_resnet.txt
|
||||
train_resnet_one_gpu.txt
|
||||
train_cifar_six_gpu.txt
|
||||
- name: Run process replay tests
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
|
||||
3
.github/workflows/test.yml
vendored
3
.github/workflows/test.yml
vendored
@@ -3,6 +3,7 @@ env:
|
||||
# increment this when downloads substantially change to avoid the internet
|
||||
DOWNLOAD_CACHE_VERSION: '5'
|
||||
RUN_PROCESS_REPLAY: 1
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -508,7 +509,7 @@ jobs:
|
||||
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
|
||||
- name: Run process replay tests
|
||||
run: |
|
||||
if [ "${{ matrix.backend }}" == "amd" ]; then
|
||||
if [ "${{ matrix.backend }}" == "amd" ] && [ "${GITHUB_REF_NAME}" != "master" ]; then
|
||||
MAX_DIFF_PCT=1 RUN_PROCESS_REPLAY=0 test/external/process_replay/test_process_replay.sh
|
||||
fi
|
||||
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH")
|
||||
|
||||
Reference in New Issue
Block a user