From 9b02aef45a43c97655260299fdcf20fb92fb785e Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Tue, 14 May 2024 17:58:19 +0300 Subject: [PATCH] remove rhip (#4579) * remove rhip * remove hip runner --- .github/workflows/test.yml | 21 ++++++++----------- .../runtime => extra/backends}/ops_rhip.py | 0 test/helpers.py | 2 +- test/test_dtype_alu.py | 2 +- test/test_linearizer.py | 4 ++-- test/test_randomness.py | 2 +- test/unit/test_disk_tensor.py | 1 - 7 files changed, 14 insertions(+), 18 deletions(-) rename {tinygrad/runtime => extra/backends}/ops_rhip.py (100%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d6cdf7c5fe..5064f1062a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -332,7 +332,7 @@ jobs: strategy: fail-fast: false matrix: - backend: [llvm, clang, gpu, cuda, hip, ptx, amd] #, triton] + backend: [llvm, clang, gpu, cuda, ptx, amd] #, triton] name: Tests on (${{ matrix.backend }}) runs-on: ubuntu-latest @@ -356,7 +356,7 @@ jobs: path: ~/.cache/tinygrad/downloads/ key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }} - name: Set env - run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'hip' && 'RHIP=1\nFORWARD_ONLY=1' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV + run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV - name: Install OpenCL if: matrix.backend == 'gpu' run: | @@ -398,8 +398,8 @@ jobs: run: | cd ${{ github.workspace }}/gpuocelot/ocelot/build sudo ninja install -d explain - - name: Install packages (hip) - if: matrix.backend == 'hip' || matrix.backend == 'amd' + - name: Install packages (amd) + if: matrix.backend == 'amd' run: | echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null @@ -416,7 +416,7 @@ jobs: run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ - name: Check Device.DEFAULT and print some source run: | - PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','RHIP','AMD'], Device.DEFAULT" + PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD'], Device.DEFAULT" DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add - name: Verify OpenCL autogen if: matrix.backend == 'gpu' @@ -433,8 +433,8 @@ jobs: ./autogen_stubs.sh nv diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py - - name: Verify HIP autogen - if: matrix.backend == 'hip' + - name: Verify AMD autogen + if: matrix.backend == 'amd' run: | cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak @@ -442,8 +442,8 @@ jobs: ./autogen_stubs.sh comgr diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py - - name: Run pytest (not cuda or hip/amd) - if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'hip' && matrix.backend != 'amd' + - name: Run pytest (not cuda or amd) + if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' run: python -m pytest -n=auto test/ --durations=20 - name: Run ONNX (only LLVM) if: matrix.backend == 'llvm' @@ -451,9 +451,6 @@ jobs: - name: Run pytest (cuda) if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton' run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --durations=20 - - name: Run pytest (hip) - if: matrix.backend=='hip' - run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hip_compile.py --durations=20 - name: Run pytest (amd) if: matrix.backend=='amd' run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hcq.py --durations=20 diff --git a/tinygrad/runtime/ops_rhip.py b/extra/backends/ops_rhip.py similarity index 100% rename from tinygrad/runtime/ops_rhip.py rename to extra/backends/ops_rhip.py diff --git a/test/helpers.py b/test/helpers.py index f1f39f9ce3..cf09bb6fe5 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -26,7 +26,7 @@ def assert_jit_cache_len(fxn, expected_len): def is_dtype_supported(dtype: DType, device: str = Device.DEFAULT): if dtype == dtypes.bfloat16: # NOTE: this requires bf16 buffer support - return device in {"RHIP", "HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX")) + return device in {"HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX")) if device in ["WEBGPU", "WEBGL"]: return dtype in [dtypes.float, dtypes.int32, dtypes.uint32] if device == "CUDA" and getenv("PTX") and dtype in (dtypes.int8, dtypes.uint8): return False # for CI GPU and OSX, cl_khr_fp16 isn't supported diff --git a/test/test_dtype_alu.py b/test/test_dtype_alu.py index 32f615dc9c..c3f8e27c00 100644 --- a/test/test_dtype_alu.py +++ b/test/test_dtype_alu.py @@ -145,7 +145,7 @@ class TestDTypeALU(unittest.TestCase): def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32) # Metal and CUDACPU and HIP behave differently than numpy in CI for overflows - skip_overflow = CI and (Device.DEFAULT in {"RHIP", "HSA", "AMD"} or getenv("CUDACPU")) + skip_overflow = CI and (Device.DEFAULT in {"HSA", "AMD"} or getenv("CUDACPU")) @given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32, strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32, ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations)) diff --git a/test/test_linearizer.py b/test/test_linearizer.py index ff1536aabb..9c9654fff8 100644 --- a/test/test_linearizer.py +++ b/test/test_linearizer.py @@ -292,7 +292,7 @@ class TestLinearizer(unittest.TestCase): # check correctness helper_tc_allclose(tc.dims[0]+pad, tc.dims[1]+pad, tc.dims[2]+pad, tc.dtype_in, tc.dtype_out, tc_opt=2) - @unittest.skipIf(CI and Device.DEFAULT in {"RHIP", "AMD"}, "RHIP/AMD CI is really slow here") + @unittest.skipIf(CI and Device.DEFAULT in {"AMD"}, "AMD CI is really slow here") def test_tensor_cores_multi_reduce(self): if not Device[Device.DEFAULT].renderer.has_tensor_cores: self.skipTest("device doesn't have tensor cores") @@ -852,7 +852,7 @@ class TestKernelOpts(unittest.TestCase): ], apply_tc=True, atol=atol, rtol=rtol) def test_padto_matmul(self): - if CI and Device.DEFAULT in ["CUDA", "RHIP", "AMD"]: self.skipTest("super slow on CUDA and RHIP because of the big grid dims") + if CI and Device.DEFAULT in ["CUDA", "AMD"]: self.skipTest("super slow on CUDA and AMD because of the big grid dims") N = 17 * 17 Tensor.manual_seed(289) a = Tensor.rand(N, N) diff --git a/test/test_randomness.py b/test/test_randomness.py index e66beb4637..47b8d02cba 100644 --- a/test/test_randomness.py +++ b/test/test_randomness.py @@ -104,7 +104,7 @@ class TestRandomness(unittest.TestCase): self.assertTrue(equal_distribution(Tensor.randn, torch.randn, lambda x: np.random.randn(*x))) @given(strat.sampled_from([dtypes.float, dtypes.float16, dtypes.bfloat16])) - @unittest.skipIf(Device.DEFAULT in ["HSA", "RHIP", "AMD"], "bfloat16 local buffer broken in HSA") + @unittest.skipIf(Device.DEFAULT in ["HSA", "AMD"], "bfloat16 local buffer broken in HSA") def test_randn_finite(self, default_float): if not is_dtype_supported(default_float): return old_default_float = dtypes.default_float diff --git a/test/unit/test_disk_tensor.py b/test/unit/test_disk_tensor.py index 4017aa6eaf..982059e196 100644 --- a/test/unit/test_disk_tensor.py +++ b/test/unit/test_disk_tensor.py @@ -284,7 +284,6 @@ class TestDiskTensor(unittest.TestCase): ret = t.to("CLANG").bitcast(dtypes.uint16) + 1 assert ret.tolist() == [2827, 3341, 3855, 4369, 4883] - @unittest.skipIf(Device.DEFAULT == "RHIP", "no real HIP device exists in CI") def test_bf16_disk_write_read(self): t = Tensor([10000, -1, -1000, -10000, 20], dtype=dtypes.float32) t.to(f"disk:{temp('f32')}").realize()