mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Merge commit 'cb3d79a185e40c9d8a579bea07747a8a8d157d52' into ifu-231117
Conflicts: lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVM.cpp lib/Dialect/TritonGPU/IR/Dialect.cpp python/setup.py python/test/unit/language/assert_helper.py python/test/unit/operators/test_flash_attention.py python/test/unit/runtime/test_subproc.py python/triton/compiler/compiler.py python/triton/language/semantic.py python/triton/runtime/autotuner.py python/triton/runtime/jit.py python/tutorials/03-matrix-multiplication.py python/tutorials/05-layer-norm.py python/tutorials/06-fused-attention.py python/tutorials/11-grouped-gemm.py test/Conversion/tritongpu_to_llvm.mlir
This commit is contained in:
146
.github/workflows/integration-tests.yml
vendored
146
.github/workflows/integration-tests.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
fi
|
||||
|
||||
|
||||
Integration-Tests-Nvidia:
|
||||
Integration-Tests:
|
||||
needs: Runner-Preparation
|
||||
|
||||
runs-on: ${{ matrix.runner }}
|
||||
@@ -73,10 +73,10 @@ jobs:
|
||||
run: |
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
python3 -m pip install ninja
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
python3 -m pip install pytest-xdist
|
||||
python3 -m pip install cmake==3.24 ninja pytest-xdist
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y ccache clang lld
|
||||
TRITON_BUILD_WITH_CLANG_LLD=true TRITON_BUILD_WITH_CCACHE=true python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Run lit tests
|
||||
if: ${{ env.BACKEND == 'CUDA'}}
|
||||
@@ -171,140 +171,8 @@ jobs:
|
||||
python3 -m pytest -vs . --reruns 10
|
||||
sudo nvidia-smi -i 0 -rgc
|
||||
|
||||
Integration-Tests-Shared-Middle-Layer:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Clear cache
|
||||
run: |
|
||||
rm -rf ~/.triton
|
||||
|
||||
- name: Update PATH
|
||||
run: |
|
||||
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check pre-commit
|
||||
run: |
|
||||
python3 -m pip install --upgrade pre-commit
|
||||
python3 -m pre_commit run --all-files --verbose
|
||||
|
||||
- name: Install Triton
|
||||
run: |
|
||||
export TRITON_CODEGEN_TRITON_SHARED=1
|
||||
git submodule update --init --recursive
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
python3 -m pip install ninja
|
||||
python3 -m pip uninstall -y triton
|
||||
python3 setup.py build
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Run shared middle-layer lit tests
|
||||
run: |
|
||||
python3 -m pip install lit
|
||||
cd python
|
||||
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/third_party/triton_shared/test"
|
||||
if [ ! -d "${LIT_TEST_DIR}" ]; then
|
||||
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
|
||||
fi
|
||||
lit -v "${LIT_TEST_DIR}"
|
||||
|
||||
|
||||
Integration-Tests-Third-Party:
|
||||
needs: Runner-Preparation
|
||||
if: false
|
||||
|
||||
runs-on: ${{ matrix.runner }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-optional)}}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set ROCM ENV
|
||||
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'gfx908')}}
|
||||
run: |
|
||||
echo "BACKEND=ROCM" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set XPU ENV
|
||||
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'arc770')}}
|
||||
run: |
|
||||
echo "BACKEND=XPU" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Clear cache
|
||||
run: |
|
||||
rm -rf ~/.triton
|
||||
|
||||
- name: Update PATH
|
||||
run: |
|
||||
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check pre-commit
|
||||
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] != 'arc770') }}
|
||||
run: |
|
||||
python3 -m pip install --upgrade pre-commit
|
||||
python3 -m pre_commit run --all-files --verbose
|
||||
|
||||
- name: Check pre-commit arc770
|
||||
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] == 'arc770') }}
|
||||
run: |
|
||||
source ${HOME}/triton_vars.sh
|
||||
source ${HOME}/miniconda3/bin/activate
|
||||
conda activate triton-xpu-ci
|
||||
python3 -m pip install --upgrade pre-commit
|
||||
python3 -m pre_commit run --all-files
|
||||
|
||||
- name: Install Triton on ROCM
|
||||
if: ${{ env.BACKEND == 'ROCM'}}
|
||||
run: |
|
||||
git submodule update --init --recursive
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
python3 -m pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
|
||||
export TRITON_CODEGEN_AMD_HIP_BACKEND=1
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Install Triton on XPU
|
||||
if: ${{ env.BACKEND == 'XPU'}}
|
||||
run: |
|
||||
source ${HOME}/triton_vars.sh
|
||||
source ${HOME}/miniconda3/bin/activate
|
||||
conda activate triton-xpu-ci
|
||||
git submodule update --init --recursive
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
export TRITON_CODEGEN_INTEL_XPU_BACKEND=1
|
||||
python3 -m pip uninstall -y triton
|
||||
python3 setup.py build
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Run python tests on ROCM
|
||||
if: ${{ env.BACKEND == 'ROCM'}}
|
||||
run: |
|
||||
cd python/test/unit/language
|
||||
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py"
|
||||
|
||||
- name: Run python tests on XPU
|
||||
if: ${{ env.BACKEND == 'XPU'}}
|
||||
run: |
|
||||
source ${HOME}/triton_vars.sh
|
||||
source ${HOME}/miniconda3/bin/activate
|
||||
conda activate triton-xpu-ci
|
||||
cd python/test/backend/third_party_backends
|
||||
python3 -m pytest --capture=tee-sys -rfs --verbose --backend xpu
|
||||
|
||||
Compare-artifacts:
|
||||
needs: Integration-Tests-Nvidia
|
||||
needs: Integration-Tests
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -333,7 +201,7 @@ jobs:
|
||||
- name: Download latest main artifacts
|
||||
env:
|
||||
ARTIFACT_NAME: artifacts A100
|
||||
ARTIFACT_JOB_NAME: Integration-Tests-Nvidia
|
||||
ARTIFACT_JOB_NAME: Integration-Tests
|
||||
MAX_NUM_ACTIONS_PAGES: 30
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
|
||||
58
.github/workflows/llvm-build.yml
vendored
58
.github/workflows/llvm-build.yml
vendored
@@ -18,43 +18,17 @@ permissions:
|
||||
jobs:
|
||||
|
||||
build:
|
||||
name: Build on ${{ matrix.config.runner }}
|
||||
runs-on: ${{ matrix.config.runs_on }}
|
||||
|
||||
strategy:
|
||||
fail-fast: true
|
||||
|
||||
matrix:
|
||||
platform: [
|
||||
ubuntu-20.04-x64,
|
||||
ubuntu-22.04-x64,
|
||||
centos-7-x64,
|
||||
macos-x64,
|
||||
macos-arm64
|
||||
]
|
||||
|
||||
include:
|
||||
# Specify OS versions
|
||||
- platform: ubuntu-20.04-x64
|
||||
host-os: ubuntu-20.04
|
||||
target-os: ubuntu
|
||||
arch: x64
|
||||
- platform: ubuntu-22.04-x64
|
||||
host-os: ubuntu-22.04
|
||||
target-os: ubuntu
|
||||
arch: x64
|
||||
- platform: centos-7-x64
|
||||
host-os: ubuntu-22.04
|
||||
target-os: centos
|
||||
arch: x64
|
||||
- platform: macos-x64
|
||||
host-os: macos-12
|
||||
target-os: macos
|
||||
arch: x64
|
||||
- platform: macos-arm64
|
||||
host-os: macos-12
|
||||
target-os: macos
|
||||
arch: arm64
|
||||
|
||||
runs-on: ${{ matrix.host-os }}
|
||||
config:
|
||||
- {runner: 'Ubuntu 20.04', runs_on: 'ubuntu-20.04', target-os: 'ubuntu', arch: 'x64'}
|
||||
- {runner: 'CentOS 7', runs_on: ['self-hosted', 'CPU'], target-os: 'centos', arch: 'x64'}
|
||||
- {runner: 'MacOS X64', runs_on: 'macos-12', target-os: 'macos', arch: 'x64'}
|
||||
- {runner: 'MacOS ARM64', runs_on: 'macos-12', target-os: 'macos', arch: 'arm64'}
|
||||
|
||||
steps:
|
||||
|
||||
@@ -73,7 +47,7 @@ jobs:
|
||||
echo "Short LLVM commit hash: ${SHORT_LLVM_COMMIT_HASH}"
|
||||
echo "short_llvm_commit_hash=${SHORT_LLVM_COMMIT_HASH}" >> ${GITHUB_ENV}
|
||||
|
||||
INSTALL_DIR="llvm-${SHORT_LLVM_COMMIT_HASH}-${{ matrix.platform }}"
|
||||
INSTALL_DIR="llvm-${SHORT_LLVM_COMMIT_HASH}-${{ matrix.config.target-os }}-${{ matrix.config.arch }}"
|
||||
echo "LLVM installation directory name: ${INSTALL_DIR}"
|
||||
echo "llvm_install_dir=${INSTALL_DIR}" >> ${GITHUB_ENV}
|
||||
|
||||
@@ -99,11 +73,11 @@ jobs:
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ${{ env.SCCACHE_DIR }}
|
||||
key: ${{ matrix.platform }}-${{ env.short_llvm_commit_hash }}
|
||||
restore-keys: ${{ matrix.platform }}-
|
||||
key: ${{ matrix.config.target-os }}-${{ matrix.config.arch }}-${{ env.short_llvm_commit_hash }}
|
||||
restore-keys: ${{ matrix.config.target-os }}-${{ matrix.config.arch }}-
|
||||
|
||||
- name: Configure, Build, Test, and Install LLVM (Ubuntu and macOS x64)
|
||||
if: matrix.arch == 'x64' && contains(fromJSON('["ubuntu", "macos"]'), matrix.target-os)
|
||||
if: matrix.config.arch == 'x64' && (matrix.config.target-os == 'ubuntu' || matrix.config.target-os == 'macos')
|
||||
run: >
|
||||
python3 -m pip install -r llvm-project/mlir/python/requirements.txt
|
||||
|
||||
@@ -114,11 +88,13 @@ jobs:
|
||||
-DCMAKE_INSTALL_PREFIX="${{ env.llvm_install_dir }}"
|
||||
-DCMAKE_LINKER=lld
|
||||
-DLLVM_BUILD_UTILS=ON
|
||||
-DLLVM_BUILD_TOOLS=ON
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON
|
||||
-DMLIR_ENABLE_BINDINGS_PYTHON=ON
|
||||
-DLLVM_ENABLE_PROJECTS=mlir
|
||||
-DLLVM_INSTALL_UTILS=ON
|
||||
-DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU"
|
||||
-DLLVM_ENABLE_TERMINFO=OFF
|
||||
llvm-project/llvm
|
||||
|
||||
ninja -C llvm-project/build check-mlir install
|
||||
@@ -126,7 +102,7 @@ jobs:
|
||||
tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
|
||||
|
||||
- name: Configure, Build, and Install LLVM (macOS arm64)
|
||||
if: matrix.arch == 'arm64' && matrix.target-os == 'macos'
|
||||
if: matrix.config.arch == 'arm64' && matrix.config.target-os == 'macos'
|
||||
run: >
|
||||
python3 -m pip install -r llvm-project/mlir/python/requirements.txt
|
||||
|
||||
@@ -138,6 +114,7 @@ jobs:
|
||||
-DCMAKE_LINKER=lld
|
||||
-DCMAKE_OSX_ARCHITECTURES=arm64
|
||||
-DLLVM_BUILD_UTILS=ON
|
||||
-DLLVM_BUILD_TOOLS=ON
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON
|
||||
-DMLIR_ENABLE_BINDINGS_PYTHON=ON
|
||||
-DLLVM_ENABLE_PROJECTS=mlir
|
||||
@@ -145,6 +122,7 @@ jobs:
|
||||
-DLLVM_INSTALL_UTILS=ON
|
||||
-DLLVM_TARGETS_TO_BUILD="AArch64"
|
||||
-DLLVM_USE_HOST_TOOLS=ON
|
||||
-DLLVM_ENABLE_TERMINFO=OFF
|
||||
llvm-project/llvm
|
||||
|
||||
ninja -C llvm-project/build install
|
||||
@@ -152,10 +130,10 @@ jobs:
|
||||
tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
|
||||
|
||||
- name: Configure, Build, Test, and Install LLVM (CentOS)
|
||||
if: matrix.target-os == 'centos'
|
||||
if: matrix.config.target-os == 'centos'
|
||||
run: |
|
||||
docker build --tag llvm-build --build-arg llvm_dir=llvm-project \
|
||||
-f llvm-build/.github/workflows/Dockerfile .
|
||||
-f llvm-build/.github/workflows/llvm-build/Dockerfile .
|
||||
|
||||
# Create temporary container to copy cache and installed artifacts.
|
||||
CONTAINER_ID=$(docker create llvm-build)
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
FROM centos:7
|
||||
ARG llvm_dir=llvm-project
|
||||
|
||||
# Add the cache artifacts and the LLVM source tree to the container
|
||||
ADD sccache /sccache
|
||||
ADD "${llvm_dir}" /source/llvm-project
|
||||
ENV SCCACHE_DIR="/sccache"
|
||||
ENV SCCACHE_CACHE_SIZE="2G"
|
||||
|
||||
RUN echo -e "[llvmtoolset-build]\nname=LLVM Toolset 13.0 - Build\nbaseurl=https://buildlogs.centos.org/c7-llvm-toolset-13.0.x86_64/\ngpgcheck=0\nenabled=1" > /etc/yum.repos.d/llvmtoolset-build.repo
|
||||
# Install build dependencies
|
||||
RUN yum install --assumeyes centos-release-scl
|
||||
RUN yum install --assumeyes devtoolset-9-gcc* python3-devel python3-pip
|
||||
SHELL [ "/usr/bin/scl", "enable", "devtoolset-9" ]
|
||||
RUN yum install --assumeyes --nogpgcheck llvm-toolset-13.0
|
||||
RUN yum install --assumeyes rh-python38-python-devel rh-python38-python-pip
|
||||
SHELL [ "/usr/bin/scl", "enable", "llvm-toolset-13.0", "rh-python38" ]
|
||||
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN python3 -m pip install --upgrade cmake ninja sccache
|
||||
@@ -21,17 +22,22 @@ RUN python3 -m pip install -r /source/llvm-project/mlir/python/requirements.txt
|
||||
# Configure, Build, Test, and Install LLVM
|
||||
RUN cmake -GNinja -Bbuild \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_COMPILER=gcc \
|
||||
-DCMAKE_CXX_COMPILER=g++ \
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_CXX_COMPILER=clang++ \
|
||||
-DCMAKE_ASM_COMPILER=clang \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=sccache \
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
|
||||
-DCMAKE_CXX_FLAGS="-Wno-everything" \
|
||||
-DCMAKE_LINKER=lld \
|
||||
-DCMAKE_INSTALL_PREFIX="/install" \
|
||||
-DLLVM_BUILD_UTILS=ON \
|
||||
-DLLVM_BUILD_TOOLS=ON \
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON \
|
||||
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
|
||||
-DLLVM_ENABLE_PROJECTS=mlir \
|
||||
-DLLVM_ENABLE_TERMINFO=OFF \
|
||||
-DLLVM_INSTALL_UTILS=ON \
|
||||
-DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" \
|
||||
/source/llvm-project/llvm
|
||||
|
||||
RUN ninja -C build check-mlir install
|
||||
RUN ninja -C build install
|
||||
167
.github/workflows/third-party/integration-tests.yml
vendored
Normal file
167
.github/workflows/third-party/integration-tests.yml
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
name: Integration Tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
merge_group:
|
||||
branches: [main]
|
||||
types: [checks_requested]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
|
||||
|
||||
env:
|
||||
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
|
||||
|
||||
jobs:
|
||||
Runner-Preparation:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix-required: ${{ steps.set-matrix.outputs.matrix-required }}
|
||||
matrix-optional: ${{ steps.set-matrix.outputs.matrix-optional }}
|
||||
steps:
|
||||
- name: Prepare runner matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
|
||||
echo '::set-output name=matrix-required::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
|
||||
echo '::set-output name=matrix-optional::[["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
|
||||
else
|
||||
echo '::set-output name=matrix-required::["ubuntu-latest"]'
|
||||
echo '::set-output name=matrix-optional::["ubuntu-latest"]'
|
||||
fi
|
||||
|
||||
|
||||
Integration-Tests-Shared-Middle-Layer:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Clear cache
|
||||
run: |
|
||||
rm -rf ~/.triton
|
||||
|
||||
- name: Update PATH
|
||||
run: |
|
||||
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check pre-commit
|
||||
run: |
|
||||
python3 -m pip install --upgrade pre-commit
|
||||
python3 -m pre_commit run --all-files --verbose
|
||||
|
||||
- name: Install Triton
|
||||
run: |
|
||||
export TRITON_CODEGEN_TRITON_SHARED=1
|
||||
git submodule update --init --recursive
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
python3 -m pip install ninja
|
||||
python3 -m pip uninstall -y triton
|
||||
python3 setup.py build
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Run shared middle-layer lit tests
|
||||
run: |
|
||||
python3 -m pip install lit
|
||||
cd python
|
||||
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/third_party/triton_shared/test"
|
||||
if [ ! -d "${LIT_TEST_DIR}" ]; then
|
||||
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
|
||||
fi
|
||||
lit -v "${LIT_TEST_DIR}"
|
||||
|
||||
|
||||
Integration-Tests-Third-Party:
|
||||
needs: Runner-Preparation
|
||||
if: false
|
||||
|
||||
runs-on: ${{ matrix.runner }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-optional)}}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set ROCM ENV
|
||||
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'gfx908')}}
|
||||
run: |
|
||||
echo "BACKEND=ROCM" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set XPU ENV
|
||||
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'arc770')}}
|
||||
run: |
|
||||
echo "BACKEND=XPU" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Clear cache
|
||||
run: |
|
||||
rm -rf ~/.triton
|
||||
|
||||
- name: Update PATH
|
||||
run: |
|
||||
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check pre-commit
|
||||
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] != 'arc770') }}
|
||||
run: |
|
||||
python3 -m pip install --upgrade pre-commit
|
||||
python3 -m pre_commit run --all-files --verbose
|
||||
|
||||
- name: Check pre-commit arc770
|
||||
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] == 'arc770') }}
|
||||
run: |
|
||||
source ${HOME}/triton_vars.sh
|
||||
source ${HOME}/miniconda3/bin/activate
|
||||
conda activate triton-xpu-ci
|
||||
python3 -m pip install --upgrade pre-commit
|
||||
python3 -m pre_commit run --all-files
|
||||
|
||||
- name: Install Triton on ROCM
|
||||
if: ${{ env.BACKEND == 'ROCM'}}
|
||||
run: |
|
||||
git submodule update --init --recursive
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
python3 -m pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
|
||||
export TRITON_CODEGEN_AMD_HIP_BACKEND=1
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Install Triton on XPU
|
||||
if: ${{ env.BACKEND == 'XPU'}}
|
||||
run: |
|
||||
source ${HOME}/triton_vars.sh
|
||||
source ${HOME}/miniconda3/bin/activate
|
||||
conda activate triton-xpu-ci
|
||||
git submodule update --init --recursive
|
||||
cd python
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install cmake==3.24
|
||||
export TRITON_CODEGEN_INTEL_XPU_BACKEND=1
|
||||
python3 -m pip uninstall -y triton
|
||||
python3 setup.py build
|
||||
python3 -m pip install --no-build-isolation -vvv '.[tests]'
|
||||
|
||||
- name: Run python tests on ROCM
|
||||
if: ${{ env.BACKEND == 'ROCM'}}
|
||||
run: |
|
||||
cd python/test/unit/language
|
||||
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py"
|
||||
|
||||
- name: Run python tests on XPU
|
||||
if: ${{ env.BACKEND == 'XPU'}}
|
||||
run: |
|
||||
source ${HOME}/triton_vars.sh
|
||||
source ${HOME}/miniconda3/bin/activate
|
||||
conda activate triton-xpu-ci
|
||||
cd python/test/backend/third_party_backends
|
||||
python3 -m pytest --capture=tee-sys -rfs --verbose --backend xpu
|
||||
16
.github/workflows/torch-inductor-tests.yml
vendored
16
.github/workflows/torch-inductor-tests.yml
vendored
@@ -1,7 +1,9 @@
|
||||
name: Torchinductor
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
workflow_run:
|
||||
workflows: ["Wheel"]
|
||||
types: [completed]
|
||||
|
||||
jobs:
|
||||
Runner-Preparation:
|
||||
@@ -23,17 +25,17 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
#- name: Packages
|
||||
# run: |
|
||||
# ./.github/workflows/torchinductor/scripts/install_torchinductor.sh
|
||||
- name: Packages
|
||||
run: |
|
||||
./.github/workflows/torch-inductor/scripts/install_torchinductor.sh torchbench
|
||||
- name: Environment
|
||||
run: |
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
./.github/workflows/torchinductor/scripts/install_triton.sh
|
||||
./.github/workflows/torch-inductor/scripts/install_triton.sh
|
||||
- name: Performance
|
||||
run: |
|
||||
./.github/workflows/torchinductor/scripts/run_torchinductor_perf.sh
|
||||
./.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh torchbench
|
||||
# Runs too long time
|
||||
#- name: Accuracy
|
||||
# run: |
|
||||
# ./.github/workflows/torchinductor/scripts/run_torchinductor_acc.sh
|
||||
# ./.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
|
||||
|
||||
@@ -3,8 +3,7 @@ import csv
|
||||
from collections import namedtuple
|
||||
|
||||
# Create a named tuple for the output of the benchmark
|
||||
BenchmarkOutput = namedtuple(
|
||||
'BenchmarkOutput', ['dev', 'name', 'batch_size', 'speedup', 'latency'])
|
||||
BenchmarkOutput = namedtuple('BenchmarkOutput', ['dev', 'name', 'batch_size', 'speedup', 'latency'])
|
||||
|
||||
|
||||
def parse_output(file_path: str) -> dict:
|
||||
@@ -19,13 +18,11 @@ def parse_output(file_path: str) -> dict:
|
||||
batch_size = row[2]
|
||||
speedup = float(row[3])
|
||||
latency = float(row[4])
|
||||
entries[name] = BenchmarkOutput(
|
||||
dev, name, batch_size, speedup, latency)
|
||||
entries[name] = BenchmarkOutput(dev, name, batch_size, speedup, latency)
|
||||
return entries
|
||||
|
||||
|
||||
def compare(baseline: dict, new: dict, threshold: float,
|
||||
geomean_threshold: float) -> bool:
|
||||
def compare(baseline: dict, new: dict, threshold: float, geomean_threshold: float) -> bool:
|
||||
baseline_geomean = 1.0
|
||||
new_geomean = 1.0
|
||||
for key in new:
|
||||
@@ -33,20 +30,27 @@ def compare(baseline: dict, new: dict, threshold: float,
|
||||
print(f"New benchmark {key} not found in baseline")
|
||||
baseline_latency = baseline[key].latency
|
||||
new_latency = new[key].latency
|
||||
if baseline_latency == 0:
|
||||
print(f"Baseline latency for {key} is 0")
|
||||
continue
|
||||
elif new_latency == 0:
|
||||
print(f"New latency for {key} is 0")
|
||||
continue
|
||||
|
||||
if new_latency < baseline_latency * (1 - threshold):
|
||||
print(
|
||||
f"New benchmark {key} is faster than baseline: {new_latency} vs {baseline_latency}")
|
||||
print(f"New benchmark {key} is faster than baseline: {new_latency} vs {baseline_latency}")
|
||||
elif new_latency > baseline_latency * (1 + threshold):
|
||||
print(
|
||||
f"New benchmark {key} is slower than baseline: {new_latency} vs {baseline_latency}")
|
||||
print(f"New benchmark {key} is slower than baseline: {new_latency} vs {baseline_latency}")
|
||||
else:
|
||||
print(f"New benchmark {key} is within threshold: {new_latency} vs {baseline_latency}")
|
||||
baseline_geomean *= baseline[key].speedup
|
||||
new_geomean *= new[key].speedup
|
||||
|
||||
baseline_geomean = baseline_geomean ** (1 / len(baseline))
|
||||
new_geomean = new_geomean ** (1 / len(new))
|
||||
baseline_geomean = baseline_geomean**(1 / len(baseline))
|
||||
new_geomean = new_geomean**(1 / len(new))
|
||||
print(f"Baseline geomean: {baseline_geomean}")
|
||||
print(f"New geomean: {new_geomean}")
|
||||
assert new_geomean > baseline_geomean * (1 - geomean_threshold), \
|
||||
assert new_geomean >= baseline_geomean * (1 - geomean_threshold), \
|
||||
f"New geomean is slower than baseline: {new_geomean} vs {baseline_geomean}"
|
||||
|
||||
|
||||
70
.github/workflows/torch-inductor/scripts/install_torchinductor.sh
vendored
Executable file
70
.github/workflows/torch-inductor/scripts/install_torchinductor.sh
vendored
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
MODEL_SPEC=$1
|
||||
|
||||
# torchinductor venv
|
||||
whoami
|
||||
# clean up old venv
|
||||
rm -rf /opt/torchinductor_venv
|
||||
python3 -m venv /opt/torchinductor_venv
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source ./.github/workflows/torch-inductor/scripts/common.sh
|
||||
|
||||
# pytorch nightly
|
||||
pip3 install --force-reinstall --pre torch torchtext torchvision torchaudio torchrec --extra-index-url https://download.pytorch.org/whl/nightly/cu121
|
||||
# pytorch source to get torchbench for dynamo
|
||||
cd /opt || exit
|
||||
# cleanup old pytorch
|
||||
rm -rf pytorch
|
||||
git clone --recursive https://github.com/pytorch/pytorch
|
||||
cd pytorch || exit
|
||||
# if you are updating an existing checkout
|
||||
git submodule sync
|
||||
git submodule update --init --recursive
|
||||
cd ..
|
||||
|
||||
# required packages
|
||||
# https://github.com/pytorch/benchmark/blob/main/docker/gcp-a100-runner-dind.dockerfile#L17
|
||||
sudo apt-get install --yes libpango-1.0-0 libpangoft2-1.0-0
|
||||
pip3 install --upgrade pip
|
||||
pip3 install expecttest psutil lightning-utilities pyre_extensions
|
||||
|
||||
# torchbench
|
||||
if [ "$MODEL_SPEC" == "torchbench" ] || [ "$MODEL_SPEC" != "all" ]; then
|
||||
# clean up old torchbench
|
||||
rm -rf benchmark
|
||||
pip3 install pyyaml
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
cd benchmark || exit
|
||||
python3 install.py
|
||||
cd ..
|
||||
fi
|
||||
|
||||
# timm
|
||||
if [ "$MODEL_SPEC" == "timm_models" ] || [ "$MODEL_SPEC" != "all" ]; then
|
||||
# clean up old timm
|
||||
rm -rf pytorch-image-models
|
||||
git clone https://github.com/huggingface/pytorch-image-models.git
|
||||
cd pytorch-image-models || exit
|
||||
pip3 install -e .
|
||||
cd ..
|
||||
fi
|
||||
|
||||
# build our own triton
|
||||
cd "$ROOT" || exit
|
||||
cd python || exit
|
||||
rm -rf build
|
||||
pip3 install -e .
|
||||
pip3 uninstall pytorch-triton -y
|
||||
|
||||
# clean up cache
|
||||
rm -rf /tmp/torchinductor_root/
|
||||
rm -rf ~/.triton/cache
|
||||
rm -rf "$TEST_REPORTS_DIR"
|
||||
|
||||
# go back to where we started
|
||||
cd "$ROOT" || exit
|
||||
@@ -6,7 +6,7 @@ ROOT="$(pwd)"
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source ./.github/workflows/torchinductor/scripts/common.sh
|
||||
source ./.github/workflows/torch-inductor/scripts/common.sh
|
||||
|
||||
# build our own triton
|
||||
cd python || exit
|
||||
@@ -2,7 +2,8 @@
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
INDUCTOR="$ROOT"/.github/workflows/torchinductor
|
||||
INDUCTOR="$ROOT"/.github/workflows/torch-inductor
|
||||
MODEL_SPEC=$1
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
@@ -14,6 +15,9 @@ TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Running accuracy test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --device cuda \
|
||||
--output "$TEST_REPORTS_DIR"/inference_"$model".csv
|
||||
@@ -25,6 +29,9 @@ done
|
||||
|
||||
cd "$ROOT" || exit
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Checking accuracy test for $model"
|
||||
python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/inference_"$model".csv
|
||||
python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/training_"$model".csv
|
||||
69
.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh
vendored
Executable file
69
.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh
vendored
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
INDUCTOR="$ROOT"/.github/workflows/torch-inductor
|
||||
MODEL_SPEC=$1
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source "$INDUCTOR"/scripts/common.sh
|
||||
|
||||
# lock GPU clocks to 1350 MHz
|
||||
sudo nvidia-smi -i 0 -pm 1
|
||||
sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
|
||||
|
||||
cd "$PYTORCH_DIR" || exit
|
||||
TRITON_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/perf
|
||||
BASE_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
|
||||
mkdir -p "$TRITON_TEST_REPORTS_DIR"
|
||||
mkdir -p "$BASE_TEST_REPORTS_DIR"
|
||||
|
||||
|
||||
echo "Running with Triton Nightly"
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Running performance test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --float32 -dcuda --training --inductor --performance \
|
||||
--output "$TRITON_TEST_REPORTS_DIR"/"$model".csv
|
||||
done
|
||||
|
||||
# install pytorch-triton
|
||||
pip3 uninstall triton -y
|
||||
pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu121
|
||||
|
||||
echo "Running with pytorch-triton"
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Running performance test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --float32 -dcuda --training --inductor --performance \
|
||||
--output "$BASE_TEST_REPORTS_DIR"/"$model".csv
|
||||
done
|
||||
|
||||
# uninstall pytorch-triton
|
||||
pip3 uninstall pytorch-triton -y
|
||||
|
||||
cd "$ROOT" || exit
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Checking performance test for $model"
|
||||
python3 "$INDUCTOR"/scripts/check_perf.py --new "$TRITON_TEST_REPORTS_DIR"/"$model".csv --baseline "$BASE_TEST_REPORTS_DIR"/"$model".csv
|
||||
EXIT_STATUS=$?
|
||||
if [ "$EXIT_STATUS" -ne 0 ]; then
|
||||
echo "Performance test for $model failed"
|
||||
exit "$EXIT_STATUS"
|
||||
fi
|
||||
done
|
||||
|
||||
# unlock GPU clocks
|
||||
sudo nvidia-smi -i 0 -rgc
|
||||
|
||||
# go back to where we started
|
||||
cd "$ROOT" || exit
|
||||
@@ -1,37 +0,0 @@
|
||||
dev,name,batch_size,speedup,abs_latency,compilation_latency,compression_ratio
|
||||
cuda,AlbertForMaskedLM,4,1.5511,164.3373,26.8523,1.2647
|
||||
cuda,AlbertForQuestionAnswering,4,1.5501,163.5580,25.7983,1.3145
|
||||
cuda,BartForCausalLM,4,1.5080,71.7230,32.8907,0.9749
|
||||
cuda,BertForMaskedLM,16,1.5350,67.9451,35.3286,1.0494
|
||||
cuda,BertForQuestionAnswering,16,1.6735,53.2963,34.3754,1.1710
|
||||
cuda,BlenderbotSmallForCausalLM,64,1.2106,46.6466,23.8058,0.9120
|
||||
cuda,BlenderbotSmallForConditionalGeneration,64,1.3616,77.3013,55.3546,0.9803
|
||||
cuda,CamemBert,16,1.4779,76.1809,35.3883,1.0469
|
||||
cuda,DebertaForMaskedLM,4,0.8415,62.3395,35.9657,1.0418
|
||||
cuda,DebertaForQuestionAnswering,8,1.0609,67.5151,35.7728,1.1528
|
||||
cuda,DebertaV2ForMaskedLM,1,0.6026,134.6517,66.1783,0.9773
|
||||
cuda,DistilBertForMaskedLM,128,1.2460,66.9382,18.3089,0.9624
|
||||
cuda,DistilBertForQuestionAnswering,256,1.3997,72.4126,18.1956,1.1486
|
||||
cuda,DistillGPT2,16,1.6656,60.5455,17.2280,1.0641
|
||||
cuda,ElectraForCausalLM,32,1.8299,45.4841,37.0944,0.9717
|
||||
cuda,ElectraForQuestionAnswering,64,2.0289,52.6890,35.9632,1.1928
|
||||
cuda,GPT2ForSequenceClassification,4,2.2567,38.2969,30.0527,1.2323
|
||||
cuda,LayoutLMForMaskedLM,16,1.5423,68.8018,36.5562,1.0495
|
||||
cuda,LayoutLMForSequenceClassification,16,1.7058,53.9355,35.2225,1.1659
|
||||
cuda,MBartForCausalLM,4,1.4945,71.4649,32.8653,0.9830
|
||||
cuda,MegatronBertForCausalLM,4,1.4328,58.4404,70.6226,1.0951
|
||||
cuda,MegatronBertForQuestionAnswering,8,1.5886,85.2533,69.1219,1.1152
|
||||
cuda,MobileBertForMaskedLM,64,0.9007,131.7379,107.5275,1.0136
|
||||
cuda,MobileBertForQuestionAnswering,128,0.8435,167.9066,106.7049,0.8579
|
||||
cuda,PLBartForCausalLM,8,1.5261,68.9224,19.5826,0.9887
|
||||
cuda,PLBartForConditionalGeneration,4,1.5298,71.2811,45.6902,1.0495
|
||||
cuda,PegasusForCausalLM,32,1.2212,57.5436,33.3863,0.9736
|
||||
cuda,PegasusForConditionalGeneration,32,1.2822,106.4678,69.8825,1.0689
|
||||
cuda,RobertaForCausalLM,16,1.6128,67.5706,34.7355,1.0496
|
||||
cuda,RobertaForQuestionAnswering,16,1.6800,53.6267,33.8527,1.1704
|
||||
cuda,Speech2Text2ForCausalLM,256,1.8230,32.9145,18.7201,0.8760
|
||||
cuda,T5ForConditionalGeneration,4,1.6592,59.5324,39.4406,1.1814
|
||||
cuda,T5Small,4,1.6581,59.5930,37.0471,1.1814
|
||||
cuda,TrOCRForCausalLM,32,1.2586,106.2633,32.5330,0.9583
|
||||
cuda,XLNetLMHeadModel,8,1.8108,142.8795,84.8197,1.1240
|
||||
cuda,YituTechConvBert,16,1.5207,81.4595,53.1565,1.0362
|
||||
|
@@ -1,54 +0,0 @@
|
||||
dev,name,batch_size,speedup,abs_latency,compilation_latency,compression_ratio
|
||||
cuda,adv_inception_v3,128,1.5923,102.5292,51.6032,1.0472
|
||||
cuda,beit_base_patch16_224,64,1.3390,75.3027,29.7471,1.0156
|
||||
cuda,coat_lite_mini,128,2.0579,53.3689,37.1856,1.0437
|
||||
cuda,convmixer_768_32,32,1.0470,275.5328,23.8037,0.9999
|
||||
cuda,convnext_base,64,1.5084,80.1811,42.5659,1.0373
|
||||
cuda,crossvit_9_240,128,1.5392,37.1806,44.9986,0.9193
|
||||
cuda,cspdarknet53,64,1.4721,75.0403,35.2882,1.0547
|
||||
cuda,deit_base_distilled_patch16_224,64,1.1432,55.9737,23.4038,0.9816
|
||||
cuda,dla102,128,1.5282,123.7284,49.3612,1.0430
|
||||
cuda,dm_nfnet_f0,128,1.4354,79.7518,34.8994,1.1038
|
||||
cuda,dpn107,32,1.2412,83.8921,58.9111,0.9952
|
||||
cuda,eca_botnext26ts_256,128,1.5425,71.2406,28.8920,1.0270
|
||||
cuda,ese_vovnet19b_dw,128,1.4647,42.4837,18.0285,1.0135
|
||||
cuda,fbnetc_100,128,1.5795,53.8033,33.0222,1.0082
|
||||
cuda,gernet_l,128,1.1684,63.4230,26.8687,1.0053
|
||||
cuda,ghostnet_100,128,1.7812,54.4211,47.6168,1.0484
|
||||
cuda,gluon_inception_v3,128,1.5952,102.5018,50.0857,1.0469
|
||||
cuda,gmixer_24_224,128,1.6749,69.2430,42.0841,1.1921
|
||||
cuda,gmlp_s16_224,128,1.5886,79.2132,43.0142,1.2343
|
||||
cuda,hrnet_w18,128,1.3743,221.5304,134.2573,1.0100
|
||||
cuda,inception_v3,128,1.5847,102.8333,49.7648,1.0472
|
||||
cuda,jx_nest_base,32,1.3747,71.4190,61.4053,0.9905
|
||||
cuda,lcnet_050,128,1.8159,18.0047,18.8249,1.0005
|
||||
cuda,mixer_b16_224,128,1.2795,90.9229,21.0438,1.0133
|
||||
cuda,mixnet_l,128,1.2273,149.9722,47.7482,1.0129
|
||||
cuda,mnasnet_100,128,1.6594,40.0512,26.5165,1.0047
|
||||
cuda,mobilenetv2_100,128,1.6085,41.1217,27.4450,1.1731
|
||||
cuda,mobilenetv3_large_100,128,1.6610,37.9995,29.8185,1.0052
|
||||
cuda,mobilevit_s,64,1.5212,55.4152,53.6475,1.0258
|
||||
cuda,nfnet_l0,128,1.4927,65.7078,32.4067,0.9980
|
||||
cuda,pit_b_224,64,1.2286,57.9484,26.5321,0.9606
|
||||
cuda,pnasnet5large,16,1.0000,198.2494,93.4641,1.3184
|
||||
cuda,poolformer_m36,64,1.3486,103.9235,62.3196,1.1942
|
||||
cuda,regnety_002,128,1.3030,32.4968,27.2439,1.0014
|
||||
cuda,repvgg_a2,128,1.2485,59.7729,26.9209,1.0185
|
||||
cuda,res2net101_26w_4s,64,1.0813,94.1773,86.6520,0.9655
|
||||
cuda,res2net50_14w_8s,128,1.3251,109.5258,79.9578,0.9830
|
||||
cuda,res2next50,128,1.2518,125.5008,43.9754,0.9756
|
||||
cuda,resmlp_12_224,128,1.3060,45.2373,19.3709,1.1048
|
||||
cuda,resnest101e,64,1.4346,108.1945,78.1993,1.1037
|
||||
cuda,rexnet_100,128,1.4637,55.0121,41.2075,1.0862
|
||||
cuda,selecsls42b,128,1.4284,44.6645,23.3892,1.0139
|
||||
cuda,spnasnet_100,128,1.5908,45.3189,32.0148,1.0048
|
||||
cuda,swin_base_patch4_window7_224,64,1.6164,89.5854,75.5848,0.9299
|
||||
cuda,swsl_resnext101_32x16d,32,1.0175,110.0041,45.7853,1.0003
|
||||
cuda,tf_efficientnet_b0,128,1.5271,55.7361,34.5551,1.1079
|
||||
cuda,tf_mixnet_l,128,1.2369,155.9027,48.6695,1.0921
|
||||
cuda,tinynet_a,128,1.3792,53.0640,40.6346,1.1108
|
||||
cuda,tnt_s_patch16_224,128,3.1078,104.8486,59.6028,1.0660
|
||||
cuda,twins_pcpvt_base,64,1.5921,67.4600,84.4977,1.0909
|
||||
cuda,visformer_small,128,1.1952,72.8705,23.7303,1.0410
|
||||
cuda,vit_base_patch16_224,64,1.1309,56.4866,22.0208,0.9804
|
||||
cuda,volo_d1_224,64,1.6868,72.0957,65.3011,0.9729
|
||||
|
@@ -1,53 +0,0 @@
|
||||
dev,name,batch_size,speedup,abs_latency,compilation_latency,compression_ratio
|
||||
cuda,BERT_pytorch,16,1.7111,24.2741,35.7065,1.3212
|
||||
cuda,LearningToPaint,96,1.0513,10.7557,11.1879,0.9896
|
||||
cuda,Super_SloMo,6,1.3267,60.4328,28.2097,1.2392
|
||||
cuda,alexnet,128,1.1754,8.3246,5.3319,1.0003
|
||||
cuda,attention_is_all_you_need_pytorch,256,1.3416,36.4401,39.5927,1.1774
|
||||
cuda,dcgan,32,0.9151,2.6249,3.2964,1.0082
|
||||
cuda,densenet121,4,0.9225,51.3747,68.5841,0.9930
|
||||
cuda,doctr_det_predictor,0,0.0000
|
||||
cuda,doctr_reco_predictor,0,0.0000
|
||||
cuda,drq,1,0.9500,3.4884,4.8028,0.9687
|
||||
cuda,fastNLP_Bert,6,1.4328,34.7753,35.4863,1.2368
|
||||
cuda,functorch_dp_cifar10,64,1.2015,8.1625,12.9040,1.0609
|
||||
cuda,functorch_maml_omniglot,1,0.9322,2.5844,3.8640,1.0000
|
||||
cuda,hf_Albert,8,2.1228,30.3377,26.8282,1.2676
|
||||
cuda,hf_Bart,4,1.2899,39.1935,47.2373,1.0080
|
||||
cuda,hf_Bert,4,1.3262,26.1063,35.0281,1.0656
|
||||
cuda,hf_Bert_large,4,1.4163,55.1021,67.2825,1.0915
|
||||
cuda,hf_DistilBert,8,1.4051,21.7191,18.0399,1.0242
|
||||
cuda,hf_GPT2,4,1.6661,26.9039,29.9473,1.1555
|
||||
cuda,hf_Longformer,0,0.0000
|
||||
cuda,hf_Reformer,4,1.1709,64.6979,15.7035,0.9267
|
||||
cuda,hf_T5_large,2,1.7215,107.0798,148.8805,1.1684
|
||||
cuda,lennard_jones,1000,0.8428,1.8488,3.0609,1.0001
|
||||
cuda,maml_omniglot,32,0.9648,2.6869,3.9775,0.9999
|
||||
cuda,mnasnet1_0,32,1.0469,21.6251,25.8232,0.9996
|
||||
cuda,mobilenet_v2,96,1.5604,31.9572,27.0225,1.1734
|
||||
cuda,nvidia_deeprecommender,256,1.0605,9.2080,4.1318,0.9711
|
||||
cuda,phlippe_densenet,128,1.0237,27.5988,28.0400,1.0023
|
||||
cuda,phlippe_resnet,128,1.0493,10.9751,10.2485,1.0092
|
||||
cuda,pytorch_CycleGAN_and_pix2pix,1,1.3724,8.2225,11.9561,1.0219
|
||||
cuda,pytorch_stargan,16,1.1835,11.9178,10.0507,1.0868
|
||||
cuda,pytorch_unet,1,1.3787,29.7543,13.7711,1.0100
|
||||
cuda,resnet152,32,0.9834,63.2446,67.7935,0.9991
|
||||
cuda,resnet18,16,0.9451,9.4977,11.7663,0.9948
|
||||
cuda,resnet50,32,1.0513,24.5141,24.6629,1.0021
|
||||
cuda,resnext50_32x4d,8,0.9216,22.2460,24.3420,0.9984
|
||||
cuda,shufflenet_v2_x1_0,128,1.1943,25.4520,28.8611,1.0951
|
||||
cuda,soft_actor_critic,256,0.8691,1.9637,3.3716,0.9996
|
||||
cuda,speech_transformer,32,1.2718,35.2922,46.9957,1.0897
|
||||
cuda,squeezenet1_1,32,1.1302,8.4540,7.9625,1.0771
|
||||
cuda,timm_efficientdet,1,1.3370,80.0377,120.1814,1.2713
|
||||
cuda,timm_efficientnet,32,1.1874,27.6302,33.9059,1.0971
|
||||
cuda,timm_nfnet,128,1.4525,77.3461,34.3270,1.1056
|
||||
cuda,timm_regnet,32,1.0644,50.6953,35.7562,1.0000
|
||||
cuda,timm_resnest,32,1.6200,14.7763,17.2245,1.0906
|
||||
cuda,timm_vision_transformer,32,1.0800,19.4188,22.0255,0.9966
|
||||
cuda,timm_vision_transformer_large,32,1.0081,393.1742,127.8083,0.9735
|
||||
cuda,timm_vovnet,32,1.1472,22.4727,22.7328,1.0120
|
||||
cuda,torchrec_dlrm,0,0.0000
|
||||
cuda,tts_angular,64,0.8974,6.5057,2.5555,0.9973
|
||||
cuda,vgg16,64,1.2909,50.7405,6.1510,0.9828
|
||||
cuda,yolov3,16,1.2930,54.8069,41.9269,1.0563
|
||||
|
@@ -1,54 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
|
||||
# torchinductor venv
|
||||
whoami
|
||||
python3 -m venv /opt/torchinductor_venv
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source ./.github/workflows/torchinductor/scripts/common.sh
|
||||
|
||||
# pytorch nightly
|
||||
pip3 install --force-reinstall --pre torch torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu118
|
||||
# pytorch source to get torchbench for dynamo
|
||||
cd /opt || exit
|
||||
git clone --recursive https://github.com/pytorch/pytorch
|
||||
cd pytorch || exit
|
||||
# if you are updating an existing checkout
|
||||
git submodule sync
|
||||
git submodule update --init --recursive
|
||||
cd ..
|
||||
|
||||
# required packages
|
||||
pip3 install expecttest psutil
|
||||
|
||||
# torchbench
|
||||
pip3 install pyyaml
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
cd benchmark || exit
|
||||
python3 install.py
|
||||
cd ..
|
||||
|
||||
# timm
|
||||
git clone https://github.com/huggingface/pytorch-image-models.git
|
||||
cd pytorch-image-models || exit
|
||||
pip3 install -e .
|
||||
cd ..
|
||||
|
||||
# build our own triton
|
||||
cd "$ROOT" || exit
|
||||
cd python || exit
|
||||
rm -rf build
|
||||
pip3 install -e .
|
||||
pip3 uninstall pytorch-triton -y
|
||||
|
||||
# clean up cache
|
||||
rm -rf /tmp/torchinductor_root/
|
||||
rm -rf ~/.triton/cache
|
||||
rm -rf "$TEST_REPORTS_DIR"
|
||||
|
||||
# go back to where we started
|
||||
cd "$ROOT" || exit
|
||||
@@ -1,41 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
INDUCTOR="$ROOT"/.github/workflows/torchinductor
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source "$INDUCTOR"/scripts/common.sh
|
||||
|
||||
# lock GPU clocks to 1350 MHz
|
||||
sudo nvidia-smi -i 0 -pm 1
|
||||
sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
|
||||
|
||||
cd "$PYTORCH_DIR" || exit
|
||||
TEST_REPORTS_DIR=$TEST_REPORTS_DIR/perf
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
for model in "${MODELS[@]}"; do
|
||||
echo "Running performance test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --ci --training --performance --disable-cudagraphs\
|
||||
--device cuda --inductor --amp --output "$TEST_REPORTS_DIR"/"$model".csv
|
||||
done
|
||||
|
||||
cd "$ROOT" || exit
|
||||
for model in "${MODELS[@]}"; do
|
||||
echo "Checking performance test for $model"
|
||||
python3 "$INDUCTOR"/scripts/check_perf.py --new "$TEST_REPORTS_DIR"/"$model".csv --baseline "$INDUCTOR"/data/"$model".csv
|
||||
EXIT_STATUS=$?
|
||||
if [ "$EXIT_STATUS" -ne 0 ]; then
|
||||
echo "Performance test for $model failed"
|
||||
exit "$EXIT_STATUS"
|
||||
fi
|
||||
done
|
||||
|
||||
# unlock GPU clocks
|
||||
sudo nvidia-smi -i 0 -rgc
|
||||
|
||||
# go back to where we started
|
||||
cd "$ROOT" || exit
|
||||
2
.github/workflows/wheels.yml
vendored
2
.github/workflows/wheels.yml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
|
||||
Build-Wheels:
|
||||
|
||||
runs-on: [self-hosted, V100]
|
||||
runs-on: [self-hosted, CPU]
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
Reference in New Issue
Block a user