From d275ff930ac2567d58ebbec535a3c919b90190e1 Mon Sep 17 00:00:00 2001 From: Davi Silva Date: Tue, 28 Nov 2023 11:33:11 +0700 Subject: [PATCH] HIP compilation on CI targeting RDNA3 (#2459) * hip amd compilation * gate the test properly * cleanup unused import * remove superfluous numpy conversion * add SpeedyNet tests (f32 [passes] & f16 [fails]) * make CI verbose (error log from hip compiler) * test the real ops_hip * Merge branch 'tinygrad:master' into ci/hip-compilation * fix CI * cleanup * really fix CI --- .github/workflows/test.yml | 41 +++++++++++++++++++++++++++++++++++++ test/test_hip_rdna3.py | 37 +++++++++++++++++++++++++++++++++ tinygrad/runtime/ops_hip.py | 16 ++++++++------- 3 files changed, 87 insertions(+), 7 deletions(-) create mode 100644 test/test_hip_rdna3.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a406efda70..f4f34de990 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -276,6 +276,47 @@ jobs: - name: Run WEBGPU Efficientnet run: node test/test_webgpu.js + testhipcompilation: + name: HIP Compilation Tests + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Cache python packages + uses: actions/cache@v3 + with: + path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages + key: testing-packages-${{ hashFiles('**/setup.py') }} + - name: Cache downloads + uses: actions/cache@v3 + with: + path: ~/.cache/tinygrad/downloads/ + key: downloads-cache-hipcompilation-${{ env.DOWNLOAD_CACHE_VERSION }} + - name: Install HIP tools + run: | + wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null + # ROCm repository for jammy + sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF' + deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/debian jammy main + EOF + # Prefer packages from the rocm repository over system packages + echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 + sudo apt update + sudo apt install --allow-unauthenticated -y rocm-hip-libraries hip-dev + - name: Install Python Dependencies + run: pip install -e '.[testing]' + - name: Test HIP compilation on RDNA3 [gfx1100] + run: | + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/hip/lib + CI=1 python -m pytest -s test/test_hip_rdna3.py + + tests: strategy: fail-fast: false diff --git a/test/test_hip_rdna3.py b/test/test_hip_rdna3.py new file mode 100644 index 0000000000..4ca29edc6e --- /dev/null +++ b/test/test_hip_rdna3.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +import unittest +from tinygrad import Tensor, Device +from tinygrad.helpers import dtypes +from examples.beautiful_mnist import Model as MNIST +from examples.hlb_cifar10 import SpeedyResNet + +@unittest.skipIf(Device.DEFAULT != "HIP", reason="testing HIP->rdna3 compilation needs HIP=1") +class TestHIPCompilationRDNA(unittest.TestCase): + def test_compile_hip_mnist(self): + model = MNIST() + + input = Tensor.rand(512,1,28,28) + output = model(input) + output.numpy() + + def test_compile_hip_speedyresnet(self): + W = Tensor.rand(12,3,2,2) + model = SpeedyResNet(W) + + input = Tensor.rand(512, 3, 32, 32) + output = model(input) + output.numpy() + + @unittest.expectedFailure + def test_compile_hip_speedyresnet_hf(self): + Tensor.default_type = dtypes.float16 + + W = Tensor.rand(12,3,2,2) + model = SpeedyResNet(W) + + input = Tensor.rand(512, 3, 32, 32) + output = model(input) + output.numpy() + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tinygrad/runtime/ops_hip.py b/tinygrad/runtime/ops_hip.py index 653668dc73..9ac2ecc6e8 100644 --- a/tinygrad/runtime/ops_hip.py +++ b/tinygrad/runtime/ops_hip.py @@ -2,10 +2,10 @@ import numpy as np import ctypes import extra.hip_wrapper as hip from typing import Tuple, List, Any, Dict, cast, Optional, Callable -from tinygrad.helpers import DEBUG, getenv, diskcache +from tinygrad.helpers import CI, DEBUG, getenv, diskcache from tinygrad.device import Compiled, CompiledASTRunner, update_stats from tinygrad.renderer.hip import HIPRenderer -from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTransfer, RawBuffer +from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTransfer, RawBuffer, RawMallocBuffer from tinygrad.codegen.kernel import LinearizerOptions from tinygrad.shape.symbolic import Variable from tinygrad.jit import JitItem, get_input_replace, get_jit_stats, get_jc_idxs_with_updatable_launch_dims, get_jc_idxs_with_updatable_var_vals, GraphException @@ -27,9 +27,9 @@ class HIPAllocator(LRUAllocator): class _HIP: def __init__(self, device=None): self.default_device = device or getenv("HIP_DEFAULT_DEVICE") - hip.hipSetDevice(self.default_device) - self.device_count = hip.hipGetDeviceCount() - self.allocator = HIPAllocator(hip.hipGetDeviceProperties(self.default_device).totalGlobalMem) + self.device_count = 0 if CI else hip.hipGetDeviceCount() + if not CI: hip.hipSetDevice(self.default_device) + self.allocator = None if CI else HIPAllocator(hip.hipGetDeviceProperties(self.default_device).totalGlobalMem) HIP = _HIP() class RawHIPBuffer(RawBufferCopyInOut, RawBufferTransfer): @@ -47,7 +47,8 @@ class RawHIPBuffer(RawBufferCopyInOut, RawBufferTransfer): @diskcache def compile_hip(prg) -> bytes: prog = hip.hiprtcCreateProgram(prg, "", [], []) - hip.hiprtcCompileProgram(prog, [f'--offload-arch={hip.hipGetDeviceProperties(HIP.default_device).gcnArchName}']) + arch = "gfx1100" if CI else hip.hipGetDeviceProperties(HIP.default_device).gcnArchName + hip.hiprtcCompileProgram(prog, [f'--offload-arch={arch}']) return hip.hiprtcGetCode(prog) def time_execution(cb, enable=False): @@ -77,6 +78,7 @@ class HIPProgram: self.prgs.append(hip.hipModuleGetFunction(self.modules[-1], name)) def __call__(self, *args, global_size:Tuple[int,int,int], local_size:Tuple[int,int,int], wait=False): + if CI: return hip.hipSetDevice(args[0]._device) if self.c_struct_t is None: self.c_struct_t = hip.getCStructForType([(ctypes.c_void_p if not isinstance(x, int) else ctypes.c_int) for x in args]) c_params = cast(Callable, self.c_struct_t)(*[x._buf if not isinstance(x, int) else x for x in args]) @@ -137,4 +139,4 @@ class HIPGraph: update_stats(f"", self.op_estimate, self.mem_estimate, var_vals, et, buf_count=len(input_rawbuffers), jit=jit, num_kernels=len(self.jit_cache)) return et -HIPBuffer = Compiled(RawHIPBuffer, LinearizerOptions(device="HIP"), HIPRenderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize, graph=HIPGraph) +HIPBuffer = Compiled(RawHIPBuffer if not CI else RawMallocBuffer, LinearizerOptions(device="HIP"), HIPRenderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize, graph=HIPGraph) \ No newline at end of file