Merge commit 'ac9fa68d18c777e421bd3f6fb1ddcfd60b6fda33' into ifu-rebase-again

Conflicts:
	.gitignore
	.gitmodules
	README.md
	bin/triton-translate.cpp
	include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td
	include/triton/Target/AMDGCN/AMDGCNTranslation.h
	include/triton/Target/HSACO/HSACOTranslation.h
	lib/Analysis/Allocation.cpp
	lib/Analysis/Utility.cpp
	lib/Conversion/TritonGPUToLLVM/CMakeLists.txt
	lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp
	lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp
	lib/Conversion/TritonGPUToLLVM/ScanOpToLLVM.cpp
	lib/Conversion/TritonGPUToLLVM/Utility.cpp
	lib/Conversion/TritonGPUToLLVM/Utility.h
	lib/Dialect/TritonGPU/IR/Dialect.cpp
	lib/Dialect/TritonGPU/Transforms/RemoveLayoutConversions.cpp
	lib/Target/HSACO/CMakeLists.txt
	lib/Target/HSACO/HSACOTranslation.cpp
	lib/Target/LLVMIR/LLVMIRTranslation.cpp
	python/src/triton.cc
	python/test/unit/language/test_core.py
	python/test/unit/operators/test_flash_attention.py
	python/triton/compiler/compiler.py
	python/triton/compiler/make_launcher.py
	python/triton/language/semantic.py
	python/triton/runtime/jit.py
	python/tutorials/06-fused-attention.py
	python/tutorials/11-grouped-gemm.py
	test/Conversion/tritongpu_to_llvm.mlir
This commit is contained in:
Jason Furmanek
2023-11-06 23:10:10 +00:00
161 changed files with 6530 additions and 3905 deletions

View File

@@ -25,6 +25,7 @@ jobs:
pip3 install tabulate
pip3 install cmake
pip3 install sphinx
pip3 install myst_parser
#- name: Fetch dependent branches
# run: |

View File

@@ -33,6 +33,7 @@ jobs:
echo '::set-output name=matrix-optional::["ubuntu-latest"]'
fi
Integration-Tests-Nvidia:
needs: Runner-Preparation
@@ -44,14 +45,14 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
submodules: 'true'
- name: Set CUDA ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}"
echo "ENABLE_TMA=0" >> "${GITHUB_ENV}"
echo "ENABLE_MMA_V3=0" >> "${GITHUB_ENV}"
echo "TRITON_DISABLE_LINE_INFO=1" >> "${GITHUB_ENV}"
- name: Clear cache
@@ -88,24 +89,26 @@ jobs:
fi
lit -v "${LIT_TEST_DIR}"
- name: Enable MMAV3 and TMA
- name: Enable TMA
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'H100')}}
run: |
echo "ENABLE_TMA=1" >> "${GITHUB_ENV}"
echo "ENABLE_MMA_V3=1" >> "${GITHUB_ENV}"
- name: Run python tests on CUDA with ENABLE_TMA=1 and ENABLE_MMA_V3=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1' && env.ENABLE_MMA_V3 == '1'}}
- name: Run python tests on CUDA with ENABLE_TMA=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 --ignore=runtime --ignore=operators --ignore=language/test_line_info.py
python3 -m pytest -n 8 --ignore=runtime --ignore=operators --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
python3 -m pytest -n 8 language/test_subprocess.py
# run runtime tests serially to avoid race condition with cache handling.
python3 -m pytest runtime/
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py
#run hopper/test_flashattention.py to avoid out of gpu memory
python3 -m pytest hopper/test_flashattention.py
- name: Run python tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}
- name: Run python tests on CUDA with ENABLE_TMA=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 --ignore=runtime --ignore=hopper --ignore=operators --ignore=language/test_line_info.py
@@ -118,14 +121,22 @@ jobs:
run: |
rm -rf ~/.triton
- name: Run partial tests on CUDA with ENABLE_TMA=1 and ENABLE_MMA_V3=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1' && env.ENABLE_MMA_V3 == '1'}}
- name: Run interpreter tests
env:
# TRITON_INTERPRET: "1"
CUA_VISIBLE_DEVICES: ""
run: |
cd python/test/unit
python3 -m pytest -vs operators/test_flash_attention.py
- name: Run partial tests on CUDA with ENABLE_TMA=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 operators
- name: Run partial tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}
- name: Run partial tests on CUDA with ENABLE_TMA=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0'}}
run: |
cd python/test/unit
python3 -m pytest -n 8 operators
@@ -160,6 +171,50 @@ jobs:
python3 -m pytest -vs . --reruns 10
sudo nvidia-smi -i 0 -rgc
Integration-Tests-Shared-Middle-Layer:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Clear cache
run: |
rm -rf ~/.triton
- name: Update PATH
run: |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
- name: Check pre-commit
run: |
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files --verbose
- name: Install Triton
run: |
export TRITON_CODEGEN_TRITON_SHARED=1
git submodule update --init --recursive
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install ninja
python3 -m pip uninstall -y triton
python3 setup.py build
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Run shared middle-layer lit tests
run: |
python3 -m pip install lit
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/third_party/triton_shared/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
Integration-Tests-Third-Party:
needs: Runner-Preparation
if: false

View File

@@ -46,8 +46,8 @@ jobs:
export CIBW_MANYLINUX_X86_64_IMAGE="quay.io/pypa/manylinux2014_x86_64:latest"
#export CIBW_MANYLINUX_PYPY_X86_64_IMAGE="quay.io/pypa/manylinux2014_x86_64:latest"
export CIBW_BEFORE_BUILD="pip install cmake;"
export CIBW_SKIP="{cp,pp}{35,36}-*"
export CIBW_BUILD="{cp,pp}3*-manylinux_x86_64"
export CIBW_SKIP="cp{35,36}-*"
export CIBW_BUILD="cp3*-manylinux_x86_64"
python3 -m cibuildwheel python --output-dir wheelhouse
- name: Install Azure CLI