[TESTS] fix flash attention (#2086)

Co-authored-by: dongdongl <dongdongl@nvidia.com>
This commit is contained in:
Dongdong Li
2023-09-20 14:23:46 +08:00
committed by GitHub
parent 363182928c
commit e5eda098b3
4 changed files with 282 additions and 20 deletions

View File

@@ -105,6 +105,8 @@ jobs:
python3 -m pytest runtime/
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py
#run hopper/test_flashattention.py to avoid out of gpu memory
python3 -m pytest hopper/test_flashattention.py
- name: Run python tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}