no amdgpu kernel driver (#10408)

* no amdgpu kernel driver * don't test hip * lower req
2026-01-09 23:18:04 -05:00 · 2025-05-18 20:52:39 -07:00
parent 4b1f1a47bb
commit b06291077c
2 changed files with 17 additions and 15 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -352,8 +352,10 @@ jobs:
    steps:
    - name: Checkout Code
      uses: actions/checkout@v4
-    - name: Insert amdgpu
-      run: sudo modprobe amdgpu
+    - name: Remove amdgpu
+      run: sudo rmmod amdgpu || true
+    #- name: Insert amdgpu
+    #  run: sudo modprobe amdgpu
    - name: Symlink models and datasets
      run: |
        mkdir -p weights
@@ -372,12 +374,12 @@ jobs:
        rm -f /tmp/staging.db /tmp/staging.db-shm /tmp/staging.db-wal
    - name: reset process replay
      run: test/external/process_replay/reset.py
-    - name: setup perflevel
-      run: |
-        examples/mlperf/training_submission_v4.1/tinycorp/benchmarks/bert/implementations/tinybox_red/setup.sh
-        rocm-smi
-    - name: Show off tinybox
-      run: /opt/rocm/bin/rocm-bandwidth-test
+    #- name: setup perflevel
+    #  run: |
+    #    examples/mlperf/training_submission_v4.1/tinycorp/benchmarks/bert/implementations/tinybox_red/setup.sh
+    #    rocm-smi
+    #- name: Show off tinybox
+    #  run: /opt/rocm/bin/rocm-bandwidth-test
    # TODO: unstable on AMD
    #- name: Run model inference benchmark
    #  run: LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 NOCLANG=1 python3 test/external/external_model_benchmark.py
@@ -397,13 +399,13 @@ jobs:
      run: AMD=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py | tee matmul_amd.txt
    - name: Test AMD=1
      run: DEBUG=2 AMD=1 python -m pytest -rA test/test_tiny.py
-    - name: Test HIP=1
-      run: DEBUG=2 HIP=1 python -m pytest -rA test/test_tiny.py
+    #- name: Test HIP=1
+    #  run: DEBUG=2 HIP=1 python -m pytest -rA test/test_tiny.py
    # TODO: AMD compiler bug causes this to fail
    #- name: Fuzz Padded Tensor Core GEMM
    #  run: HSA=1 M_START=12 M_STOP=20 M_STEP=1 N_START=12 N_STOP=20 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 DEBUG=2 python3 ./extra/gemm/fuzz_matmul.py
-    - name: Remove amdgpu
-      run: sleep 10 && sudo rmmod amdgpu # sleep a bit to let the driver unload the prev pid.
+    #- name: Remove amdgpu
+    #  run: sleep 10 && sudo rmmod amdgpu # sleep a bit to let the driver unload the prev pid.
    - name: Test AM cold start time
      run: time AMD=1 AM_RESET=1 python3 test/test_tiny.py TestTiny.test_plus
    - name: Test AM warm start time
@@ -428,8 +430,8 @@ jobs:
      run: BENCHMARK_LOG=llama3_beam_4gpu AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt
    # - name: Run LLaMA-3 8B on 6 GPUs
    #   run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_six_gpu.txt
-    - name: Restore amdgpu
-      run: sudo modprobe amdgpu
+    #- name: Restore amdgpu
+    #  run: sudo modprobe amdgpu
    # - name: Run LLaMA-2 70B
    #   run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_2_70B.txt
    - name: Run Mixtral 8x7B
--- a/test/external/speed_v_theoretical.py
+++ b/test/external/speed_v_theoretical.py
@@ -90,7 +90,7 @@ class TestKernelSpeed(unittest.TestCase):
  def test_conv_3x3_256_32_32_256_256(self): self._test_conv_3x3(256, 32, 32, 256, 256, nv_tflops=27, amd_tflops=20)

  # theoretical is nv_tflops=165, amd_tflops=123
-  def test_gemm_4096(self): self._test_matmul(4096, nv_tflops=115, amd_tflops=74)
+  def test_gemm_4096(self): self._test_matmul(4096, nv_tflops=115, amd_tflops=65)
  def test_gemm_8192(self): self._test_matmul(8192, nv_tflops=125, amd_tflops=65)

  # theoretical is nv_gbs=1008, amd_gbs=960