[TESTS] fix flash attention (#2086)

Co-authored-by: dongdongl <dongdongl@nvidia.com>
2026-04-05 03:01:17 -04:00 · 2023-09-20 14:23:46 +08:00
parent 363182928c
commit e5eda098b3
4 changed files with 282 additions and 20 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -105,6 +105,8 @@ jobs:
          python3 -m pytest runtime/
          # run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py
+          #run hopper/test_flashattention.py to avoid out of gpu memory
+          python3 -m pytest hopper/test_flashattention.py

      - name: Run python tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
        if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}