From 5986d656a284eedadf2eaca5488296c636b6e189 Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 9 Oct 2025 09:22:54 +0800 Subject: [PATCH] tighter ASSERT_MIN_STEP_TIME (#12531) set to about 1.2x of actual time now --- .github/workflows/benchmark.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 7f1adf2f0d..b0ce605f94 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -52,12 +52,12 @@ jobs: - name: reset process replay run: python3.11 test/external/process_replay/reset.py - name: Run Stable Diffusion - run: BENCHMARK_LOG=stable_diffusion JIT=1 ASSERT_MIN_STEP_TIME=1000 python3.11 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt + run: BENCHMARK_LOG=stable_diffusion JIT=1 ASSERT_MIN_STEP_TIME=800 python3.11 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt - name: Run Stable Diffusion without fp16 - run: BENCHMARK_LOG=stable_diffusion_fp32 JIT=1 ASSERT_MIN_STEP_TIME=1000 python3.11 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd_no_fp16.txt + run: BENCHMARK_LOG=stable_diffusion_fp32 JIT=1 ASSERT_MIN_STEP_TIME=900 python3.11 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd_no_fp16.txt - name: Run Stable Diffusion v2 # TODO: very slow step time - run: BENCHMARK_LOG=stable_diffusion_v2 JIT=1 ASSERT_MIN_STEP_TIME=100000 python3.11 examples/sdv2.py --fp16 --seed 0 --noshow --timing | tee sdv2.txt + run: BENCHMARK_LOG=stable_diffusion_v2 JIT=1 ASSERT_MIN_STEP_TIME=10000 python3.11 examples/sdv2.py --fp16 --seed 0 --noshow --timing | tee sdv2.txt # process replay can't capture this, the graph is too large # TODO: too slow # - name: Run SDXL @@ -101,7 +101,7 @@ jobs: - name: Run GPT2 run: | BENCHMARK_LOG=gpt2_nojit JIT=0 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt - BENCHMARK_LOG=gpt2 JIT=1 ASSERT_MIN_STEP_TIME=16 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt + BENCHMARK_LOG=gpt2 JIT=1 ASSERT_MIN_STEP_TIME=13 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt - name: Run GPT2 w HALF run: BENCHMARK_LOG=gpt2_half HALF=1 python3.11 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt - name: Run GPT2 w HALF/BEAM @@ -246,9 +246,9 @@ jobs: - name: Run GPT2 run: | BENCHMARK_LOG=gpt2_nojit NV=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt - BENCHMARK_LOG=gpt2 NV=1 JIT=1 ASSERT_MIN_STEP_TIME=10 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt + BENCHMARK_LOG=gpt2 NV=1 JIT=1 ASSERT_MIN_STEP_TIME=4 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt - name: Run GPT2 w HALF - run: BENCHMARK_LOG=gpt2_half NV=1 HALF=1 ASSERT_MIN_STEP_TIME=10 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt + run: BENCHMARK_LOG=gpt2_half NV=1 HALF=1 ASSERT_MIN_STEP_TIME=6 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt - name: Run GPT2 w HALF/BEAM run: BENCHMARK_LOG=gpt2_half_beam NV=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt - uses: actions/upload-artifact@v4 @@ -316,11 +316,11 @@ jobs: - name: Train MNIST run: time PYTHONPATH=. NV=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt - name: Run 10 CIFAR training steps - run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=850 NV=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt + run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=270 NV=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt - name: Run 10 CIFAR training steps w HALF - run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=680 NV=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt + run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=310 NV=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt - name: Run 10 CIFAR training steps w BF16 - run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=750 NV=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt + run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=310 NV=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt # TODO: too slow # - name: Run 10 CIFAR training steps w winograd # run: BENCHMARK_LOG=cifar_10steps_half_wino ASSERT_MIN_STEP_TIME=350 NV=1 CAPTURE_PROCESS_REPLAY=0 WINO=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt @@ -426,7 +426,7 @@ jobs: - name: Test AM warm start time run: time AMD=1 python3 test/test_tiny.py TestTiny.test_plus - name: Run Stable Diffusion - run: BENCHMARK_LOG=stable_diffusion ASSERT_MIN_STEP_TIME=900 AMD=1 python3 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt + run: BENCHMARK_LOG=stable_diffusion ASSERT_MIN_STEP_TIME=550 AMD=1 python3 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt # TODO: too slow # - name: Run SDXL # run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=3200 CAPTURE_PROCESS_REPLAY=0 AMD=1 python3 examples/sdxl.py --seed 0 --noshow --timing | tee sdxl.txt @@ -520,9 +520,9 @@ jobs: - name: Train MNIST run: time PYTHONPATH=. AMD=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt - name: Run 10 CIFAR training steps - run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=400 AMD=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt + run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=330 AMD=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt - name: Run 10 CIFAR training steps w HALF - run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=500 AMD=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt + run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=330 AMD=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt # - name: Run 10 CIFAR training steps w BF16 # run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=288 AMD=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt # TODO: too slow