mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
HIP CI that compiles (to RDNA3) but doesn't have to run (#2482)
* hip amd compilation * gate the test properly * cleanup unused import * remove superfluous numpy conversion * add SpeedyNet tests (f32 [passes] & f16 [fails]) * make CI verbose (error log from hip compiler) * test the real ops_hip * Merge branch 'tinygrad:master' into ci/hip-compilation * fix CI * cleanup * really fix CI * Fix CI Three: the refixening --------- Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
This commit is contained in:
41
.github/workflows/test.yml
vendored
41
.github/workflows/test.yml
vendored
@@ -276,6 +276,47 @@ jobs:
|
||||
- name: Run WEBGPU Efficientnet
|
||||
run: node test/test_webgpu.js
|
||||
|
||||
testhipcompilation:
|
||||
name: HIP Compilation Tests
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.11
|
||||
- name: Cache python packages
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
|
||||
key: testing-packages-${{ hashFiles('**/setup.py') }}
|
||||
- name: Cache downloads
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/tinygrad/downloads/
|
||||
key: downloads-cache-hipcompilation-${{ env.DOWNLOAD_CACHE_VERSION }}
|
||||
- name: Install HIP tools
|
||||
run: |
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
# ROCm repository for jammy
|
||||
sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF'
|
||||
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/debian jammy main
|
||||
EOF
|
||||
# Prefer packages from the rocm repository over system packages
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
sudo apt install --allow-unauthenticated -y rocm-hip-libraries hip-dev
|
||||
- name: Install Python Dependencies
|
||||
run: pip install -e '.[testing]'
|
||||
- name: Test HIP compilation on RDNA3 [gfx1100]
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/hip/lib
|
||||
MOCKHIP=1 HIP=1 python -m pytest -s test/test_hip_rdna3.py
|
||||
|
||||
|
||||
tests:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
||||
37
test/test_hip_rdna3.py
Normal file
37
test/test_hip_rdna3.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
import unittest
|
||||
from tinygrad import Tensor, Device
|
||||
from tinygrad.helpers import dtypes
|
||||
from examples.beautiful_mnist import Model as MNIST
|
||||
from examples.hlb_cifar10 import SpeedyResNet
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT != "HIP", reason="testing HIP->rdna3 compilation needs HIP=1")
|
||||
class TestHIPCompilationRDNA(unittest.TestCase):
|
||||
def test_compile_hip_mnist(self):
|
||||
model = MNIST()
|
||||
|
||||
input = Tensor.rand(512,1,28,28)
|
||||
output = model(input)
|
||||
output.numpy()
|
||||
|
||||
def test_compile_hip_speedyresnet(self):
|
||||
W = Tensor.rand(12,3,2,2)
|
||||
model = SpeedyResNet(W)
|
||||
|
||||
input = Tensor.rand(512, 3, 32, 32)
|
||||
output = model(input)
|
||||
output.numpy()
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_compile_hip_speedyresnet_hf(self):
|
||||
Tensor.default_type = dtypes.float16
|
||||
|
||||
W = Tensor.rand(12,3,2,2)
|
||||
model = SpeedyResNet(W)
|
||||
|
||||
input = Tensor.rand(512, 3, 32, 32)
|
||||
output = model(input)
|
||||
output.numpy()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -5,7 +5,7 @@ from typing import Tuple, List, Any, Dict, cast, Optional, Callable
|
||||
from tinygrad.helpers import DEBUG, getenv, diskcache
|
||||
from tinygrad.device import Compiled, CompiledASTRunner, update_stats
|
||||
from tinygrad.renderer.hip import HIPRenderer
|
||||
from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTransfer, RawBuffer
|
||||
from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTransfer, RawBuffer, RawMallocBuffer
|
||||
from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.shape.symbolic import Variable
|
||||
from tinygrad.jit import JitItem, get_input_replace, get_jit_stats, get_jc_idxs_with_updatable_launch_dims, get_jc_idxs_with_updatable_var_vals, GraphException
|
||||
@@ -24,12 +24,14 @@ class HIPAllocator(LRUAllocator):
|
||||
def _do_free(self, buf): hip.hipFree(buf)
|
||||
def _cached_bufkey(self, size, dtype, device): return (device, size*dtype.itemsize) # Buffers of the same length could be reused, no matter what dtype.
|
||||
|
||||
MOCKHIP = getenv("MOCKHIP") # for CI. don't run kernels, only check if they compile
|
||||
|
||||
class _HIP:
|
||||
def __init__(self, device=None):
|
||||
self.default_device = device or getenv("HIP_DEFAULT_DEVICE")
|
||||
hip.hipSetDevice(self.default_device)
|
||||
self.device_count = hip.hipGetDeviceCount()
|
||||
self.allocator = HIPAllocator(hip.hipGetDeviceProperties(self.default_device).totalGlobalMem)
|
||||
self.device_count = 0 if MOCKHIP else hip.hipGetDeviceCount()
|
||||
if not MOCKHIP: hip.hipSetDevice(self.default_device)
|
||||
self.allocator = None if MOCKHIP else HIPAllocator(hip.hipGetDeviceProperties(self.default_device).totalGlobalMem)
|
||||
HIP = _HIP()
|
||||
|
||||
class RawHIPBuffer(RawBufferCopyInOut, RawBufferTransfer):
|
||||
@@ -47,7 +49,8 @@ class RawHIPBuffer(RawBufferCopyInOut, RawBufferTransfer):
|
||||
@diskcache
|
||||
def compile_hip(prg) -> bytes:
|
||||
prog = hip.hiprtcCreateProgram(prg, "<null>", [], [])
|
||||
hip.hiprtcCompileProgram(prog, [f'--offload-arch={hip.hipGetDeviceProperties(HIP.default_device).gcnArchName}'])
|
||||
arch = "gfx1100" if MOCKHIP else hip.hipGetDeviceProperties(HIP.default_device).gcnArchName
|
||||
hip.hiprtcCompileProgram(prog, [f'--offload-arch={arch}'])
|
||||
return hip.hiprtcGetCode(prog)
|
||||
|
||||
def time_execution(cb, enable=False):
|
||||
@@ -77,6 +80,7 @@ class HIPProgram:
|
||||
self.prgs.append(hip.hipModuleGetFunction(self.modules[-1], name))
|
||||
|
||||
def __call__(self, *args, global_size:Tuple[int,int,int], local_size:Tuple[int,int,int], wait=False):
|
||||
if MOCKHIP: return
|
||||
hip.hipSetDevice(args[0]._device)
|
||||
if self.c_struct_t is None: self.c_struct_t = hip.getCStructForType([(ctypes.c_void_p if not isinstance(x, int) else ctypes.c_int) for x in args])
|
||||
c_params = cast(Callable, self.c_struct_t)(*[x._buf if not isinstance(x, int) else x for x in args])
|
||||
@@ -137,4 +141,4 @@ class HIPGraph:
|
||||
update_stats(f"<batched {len(self.jit_cache)}>", self.op_estimate, self.mem_estimate, var_vals, et, buf_count=len(input_rawbuffers), jit=jit, num_kernels=len(self.jit_cache))
|
||||
return et
|
||||
|
||||
HIPDevice = Compiled(RawHIPBuffer, LinearizerOptions(device="HIP"), HIPRenderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize, graph=HIPGraph)
|
||||
HIPDevice = Compiled(RawHIPBuffer if not MOCKHIP else RawMallocBuffer, LinearizerOptions(device="HIP"), HIPRenderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize, graph=HIPGraph)
|
||||
Reference in New Issue
Block a user