mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 14:48:06 -05:00
External CI Build Job for Pytorch WHL (#3476)
* External CI Build Job for Pytorch WHL * Upgrading build system for pytorch job
This commit is contained in:
252
.azuredevops/nightly/pytorch.yml
Normal file
252
.azuredevops/nightly/pytorch.yml
Normal file
@@ -0,0 +1,252 @@
|
||||
parameters:
|
||||
# ubuntu near-equivalent list of yum installs in https://github.com/ROCm/ROCm-docker/blob/master/dev/Dockerfile-centos-7-complete
|
||||
# plus additional packages found through iterative testing of pipeline
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- build-essential
|
||||
- git
|
||||
- ninja-build
|
||||
- openjdk-8-jdk
|
||||
- ca-certificates
|
||||
- bc
|
||||
- bridge-utils
|
||||
- cmake
|
||||
- devscripts
|
||||
- dkms
|
||||
- doxygen
|
||||
- libdpkg-dev
|
||||
- libdpkg-perl
|
||||
- libelf-dev
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- wget
|
||||
- ncurses-base
|
||||
- libncurses-dev
|
||||
- numactl
|
||||
- libnuma-dev
|
||||
- libssh-dev
|
||||
- libunwind-dev
|
||||
- llvm-dev
|
||||
- libpth-dev
|
||||
- qemu-kvm
|
||||
- re2c
|
||||
- subversion
|
||||
- fakeroot
|
||||
- autoconf
|
||||
- libgomp1
|
||||
- libtinfo-dev
|
||||
- libcholmod3
|
||||
- libsuitesparseconfig5
|
||||
- libstdc++-12-dev
|
||||
- python-is-python3
|
||||
- gfortran
|
||||
- libgfortran5
|
||||
- liblapack3
|
||||
- libblas3
|
||||
- libquadmath0
|
||||
- libmetis5
|
||||
- libamd2
|
||||
- libcamd2
|
||||
- libcolamd2
|
||||
- libccolamd2
|
||||
- libdrm-amdgpu1
|
||||
- ccache
|
||||
- zip
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- astunparse
|
||||
- expecttest!=0.2.0
|
||||
- hypothesis
|
||||
- numpy
|
||||
- psutil
|
||||
- pyyaml
|
||||
- requests
|
||||
- setuptools
|
||||
- types-dataclasses
|
||||
- typing-extensions>=4.8.0
|
||||
- sympy>=1.13.0
|
||||
- filelock
|
||||
- networkx
|
||||
- jinja2
|
||||
- fsspec
|
||||
- lintrunner
|
||||
- ninja
|
||||
- packaging
|
||||
- optree>=0.12.0
|
||||
# list from https://github.com/pytorch/builder/blob/main/manywheel/build_rocm.sh
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocminfo
|
||||
- MIOpen
|
||||
- clr
|
||||
- hipBLAS
|
||||
- hipFFT
|
||||
- hipRAND
|
||||
- hipSOLVER
|
||||
- hipSPARSE
|
||||
- ROCR-Runtime
|
||||
- llvm-project
|
||||
- rccl
|
||||
- rocBLAS
|
||||
- rocFFT
|
||||
- rocm_smi_lib
|
||||
- rocRAND
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
- roctracer
|
||||
- hipBLASLt
|
||||
- rocprofiler-register
|
||||
- rocm-core
|
||||
- rocPRIM
|
||||
# below are additional dependencies not called out by build script, but throw errors during cmake
|
||||
- hipCUB
|
||||
- rocThrust
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
schedules:
|
||||
- cron: '30 7 * * *'
|
||||
displayName: nightly pytorch
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
always: true
|
||||
|
||||
jobs:
|
||||
- job: pytorch
|
||||
timeoutInMinutes: 120
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
# various flags/parameters expected by bash scripts in pytorch builder repo
|
||||
- name: ROCM_VERSION
|
||||
value: 6.3.0
|
||||
- name: PYTORCH_ROCM_ARCH
|
||||
value: gfx942
|
||||
- name: GPU_TARGET
|
||||
value: gfx942
|
||||
- name: ROCM_PATH
|
||||
value: /opt/rocm
|
||||
- name: DESIRED_CUDA
|
||||
value: 6.3.0
|
||||
- name: MKLROOT
|
||||
value: /opt/intel
|
||||
- name: AOTRITON_INSTALLED_PREFIX
|
||||
value: /opt/rocm/aotriton
|
||||
- name: DESIRED_PYTHON
|
||||
value: 3.10
|
||||
- name: PYTORCH_ROOT
|
||||
value: $(Build.SourcesDirectory)/pytorch
|
||||
- name: CMAKE_ARGS
|
||||
value: -GNinja
|
||||
- name: DESIRED_DEVTOOLSET
|
||||
value: cxx11-abi
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
# copy environment setup from https://github.com/pytorch/builder/blob/main/manywheel/Dockerfile
|
||||
# but instead of centos, use ubuntu environment
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# wheel install location different on azure agent compared to where wheel is assumed to be installed on upstream script
|
||||
- task: Bash@3
|
||||
displayName: wheel install path symlink
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir -p /opt/python/cp310-cp310/lib/python3.10
|
||||
sudo ln -s /usr/local/lib/python3.10/dist-packages /opt/python/cp310-cp310/lib/python3.10/site-packages
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
- task: Bash@3
|
||||
displayName: ROCm symbolic links
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- checkout: self
|
||||
- task: Bash@3
|
||||
displayName: git clone pytorch builder
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: git clone upstream pytorch
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/pytorch.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Patch out forced GPU testing block in pytorch build script
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git apply $(Build.SourcesDirectory)/.azuredevops/patches/pytorch/0001-ROCm-CI-patches.patch
|
||||
workingDirectory: $(Build.SourcesDirectory)/builder
|
||||
- task: Bash@3
|
||||
displayName: Install patchelf
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_patchelf.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install mkl dependency for magma
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_mkl.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install rocm drm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_rocm_drm.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install rocm magma
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo PYTORCH_ROCM_ARCH=$(PYTORCH_ROCM_ARCH) MKLROOT=$(MKLROOT) bash pytorch/.ci/docker/common/install_rocm_magma.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install AOTriton Shared Library
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash ./install_aotriton.sh /opt/rocm
|
||||
workingDirectory: $(Build.SourcesDirectory)/pytorch/.ci/docker/common
|
||||
- task: Bash@3
|
||||
displayName: Run ROCm Build Script
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
sudo
|
||||
DESIRED_CUDA=$(DESIRED_CUDA)
|
||||
PYTORCH_ROCM_ARCH=$(PYTORCH_ROCM_ARCH)
|
||||
DESIRED_PYTHON=$(DESIRED_PYTHON)
|
||||
PYTORCH_ROOT=$(PYTORCH_ROOT)
|
||||
CMAKE_ARGS=$(CMAKE_ARGS)
|
||||
AOTRITON_INSTALLED_PREFIX=$(AOTRITON_INSTALLED_PREFIX)
|
||||
DESIRED_DEVTOOLSET=$(DESIRED_DEVTOOLSET)
|
||||
bash ./manywheel/build_rocm.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)/builder
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: /remote
|
||||
contentsString: 'wheelhouserocm*/torch*.whl'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
40
.azuredevops/patches/pytorch/0001-ROCm-CI-patches.patch
Normal file
40
.azuredevops/patches/pytorch/0001-ROCm-CI-patches.patch
Normal file
@@ -0,0 +1,40 @@
|
||||
From b2d3c88f7a8b179e814e72f76f27e25c82659200 Mon Sep 17 00:00:00 2001
|
||||
From: Joseph Macaranas <Joseph.Macaranas@amd.com>
|
||||
Date: Tue, 30 Jul 2024 05:43:06 -0400
|
||||
Subject: [PATCH] ROCm CI patches
|
||||
|
||||
---
|
||||
manywheel/build_common.sh | 9 ---------
|
||||
1 file changed, 9 deletions(-)
|
||||
|
||||
diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
|
||||
index 08ca924..52c468f 100644
|
||||
--- a/manywheel/build_common.sh
|
||||
+++ b/manywheel/build_common.sh
|
||||
@@ -475,11 +475,9 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
fi
|
||||
|
||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||
-
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
fi
|
||||
-
|
||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
|
||||
# Print info on the libraries installed in this wheel
|
||||
@@ -491,11 +489,4 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
ldd "$installed_lib" || true
|
||||
done
|
||||
|
||||
- # Run the tests
|
||||
- echo "$(date) :: Running tests"
|
||||
- pushd "$PYTORCH_ROOT"
|
||||
- LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
|
||||
- "${SOURCE_DIR}/../run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
- popd
|
||||
- echo "$(date) :: Finished tests"
|
||||
fi
|
||||
--
|
||||
2.44.0.windows.1
|
||||
|
||||
@@ -12,7 +12,7 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 --version
|
||||
- script: pip list
|
||||
- script: pip list -v
|
||||
displayName: 'list python packages'
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Cleanup checkout space'
|
||||
|
||||
Reference in New Issue
Block a user