mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
* Don't use numpy inside hlb_cifar10 training loop * Lint it * jit it * Drop the last half-batch * Use gather for random_crop and reuse perms * Wrap train_cifar in FUSE_ARANGE context * No need to pass FUSE_ARANGE=1 to hlb_cifar10.py * Add cutmix to jittable augmentations * Remove .contiguous() from fetch_batches * Fix indexing boundary --------- Co-authored-by: Irwin1138 <irwin1139@gmail.com>
34 lines
1.3 KiB
YAML
34 lines
1.3 KiB
YAML
name: Benchmark with kernel search
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- update_benchmark_search
|
|
workflow_dispatch:
|
|
|
|
jobs:
|
|
run_script_job:
|
|
runs-on: [self-hosted, Linux, tinybox]
|
|
if: github.repository_owner == 'tinygrad'
|
|
|
|
steps:
|
|
- name: Checkout Code
|
|
uses: actions/checkout@v4
|
|
- name: Remove amdgpu
|
|
run: sudo rmmod amdgpu || true
|
|
- name: Cleanup running AM processes
|
|
run: python extra/amdpci/am_smi.py --pids --kill
|
|
- name: Run SDXL with new search
|
|
# TODO: GCVM_L2_PROTECTION_FAULT_STATUS with llvm19
|
|
run: |
|
|
BENCHMARK_LOG=search_sdxl PYTHONPATH=. AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 python examples/sdxl.py --noshow --timing --seed 0
|
|
- name: Run SDXL with cached search
|
|
run: |
|
|
BENCHMARK_LOG=search_sdxl_cached PYTHONPATH=. AMD=1 JITBEAM=2 python examples/sdxl.py --noshow --timing --seed 0
|
|
- name: Run winograd cifar with new search
|
|
run: |
|
|
BENCHMARK_LOG=search_wino_cifar WINO=1 DEFAULT_FLOAT=HALF JITBEAM=4 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BS=1024 STEPS=500 python examples/hlb_cifar10.py
|
|
- name: Run winograd cifar with cached search
|
|
run: |
|
|
BENCHMARK_LOG=search_wino_cifar_cached WINO=1 DEFAULT_FLOAT=HALF JITBEAM=4 BS=1024 STEPS=500 python examples/hlb_cifar10.py
|