mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-10 07:08:08 -05:00
External CI: Add torchvision to nightly pytorch job (#3684)
- Added steps for creating wheel file for torchvision. - Tried to add torchaudio as well, but it was not reading in AMDGPU_TARGETS value in the nested cmake calls from the python setup.py execution. - Upstream pytorch builder scripts were updated, so it broke the patching step in the job. Removed the need to patch by using a flag to skip the tests. - Will work on adding smoke tests of pytorch and torchvision later, just getting this out to fix the nightly build.
This commit is contained in:
@@ -54,6 +54,14 @@ parameters:
|
||||
- libdrm-amdgpu1
|
||||
- ccache
|
||||
- zip
|
||||
- libjpeg-turbo-official
|
||||
- libjpeg-dev
|
||||
- libwebp-dev
|
||||
- libfreetype-dev
|
||||
- gnutls-bin
|
||||
- ffmpeg
|
||||
- libopenblas-dev
|
||||
- liblapack-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -76,6 +84,9 @@ parameters:
|
||||
- ninja
|
||||
- packaging
|
||||
- optree>=0.12.0
|
||||
# list for vision
|
||||
- auditwheel
|
||||
- future
|
||||
# list from https://github.com/pytorch/builder/blob/main/manywheel/build_rocm.sh
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -155,6 +166,16 @@ jobs:
|
||||
# copy environment setup from https://github.com/pytorch/builder/blob/main/manywheel/Dockerfile
|
||||
# but instead of centos, use ubuntu environment
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- task: Bash@3
|
||||
displayName: 'Register libjpeg-turbo packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget -q -O- https://packagecloud.io/dcommander/libjpeg-turbo/gpgkey | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/libjpeg-turbo.gpg > /dev/null
|
||||
echo "deb [signed-by=/etc/apt/trusted.gpg.d/libjpeg-turbo.gpg] https://packagecloud.io/dcommander/libjpeg-turbo/any/ any main" | sudo tee /etc/apt/sources.list.d/libjpeg-turbo.list
|
||||
sudo apt update
|
||||
apt-cache show libjpeg-turbo-official | grep Version
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -191,12 +212,6 @@ jobs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/pytorch.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Patch out forced GPU testing block in pytorch build script
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git apply $(Build.SourcesDirectory)/.azuredevops/patches/pytorch/0001-ROCm-CI-patches.patch
|
||||
workingDirectory: $(Build.SourcesDirectory)/builder
|
||||
- task: Bash@3
|
||||
displayName: Install patchelf
|
||||
inputs:
|
||||
@@ -229,8 +244,7 @@ jobs:
|
||||
displayName: Install AOTriton Shared Library
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash ./common/install_aotriton.sh /opt/rocm
|
||||
script: sudo bash ./common/install_aotriton.sh /opt/rocm
|
||||
workingDirectory: $(Build.SourcesDirectory)/pytorch/.ci/docker
|
||||
- task: Bash@3
|
||||
displayName: Run ROCm Build Script
|
||||
@@ -249,10 +263,59 @@ jobs:
|
||||
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
PYTORCH_BUILD_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)
|
||||
PYTORCH_BUILD_NUMBER=$(date -u +%Y%m%d)
|
||||
SKIP_ALL_TESTS=1
|
||||
bash ./manywheel/build_rocm.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)/builder
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: /remote/wheelhouserocm$(ROCM_VERSION)
|
||||
contentsString: '*.whl'
|
||||
# common helper source for pytorch vision and audio
|
||||
- task: Bash@3
|
||||
displayName: git clone pytorch test-infra
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/test-infra.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: install package helper
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 -m pip install test-infra/tools/pkg-helpers
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: pytorch pkg helpers
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: CU_VERSION=${CU_VERSION} CHANNEL=${CHANNEL} python -m pytorch_pkg_helpers
|
||||
# get torch vision source and build
|
||||
- task: Bash@3
|
||||
displayName: git clone pytorch vision
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/vision.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Build vision
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
BUILD_VERSION=$(ROCM_BRANCH).$(JOB_GPU_TARGET)-$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)$(date -u +%Y%m%d)
|
||||
python3 setup.py bdist_wheel
|
||||
workingDirectory: $(Build.SourcesDirectory)/vision
|
||||
- task: Bash@3
|
||||
displayName: Relocate vision
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 packaging/wheel/relocate.py
|
||||
workingDirectory: $(Build.SourcesDirectory)/vision
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Build.SourcesDirectory)/vision/dist
|
||||
contentsString: '*.whl'
|
||||
clean: false
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'ROCm pytorch wheel file Publish'
|
||||
displayName: 'wheel file Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetPath: /remote/wheelhouserocm$(ROCM_VERSION)
|
||||
targetPath: $(Build.BinariesDirectory)
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
From b2d3c88f7a8b179e814e72f76f27e25c82659200 Mon Sep 17 00:00:00 2001
|
||||
From: Joseph Macaranas <Joseph.Macaranas@amd.com>
|
||||
Date: Tue, 30 Jul 2024 05:43:06 -0400
|
||||
Subject: [PATCH] ROCm CI patches
|
||||
|
||||
---
|
||||
manywheel/build_common.sh | 9 ---------
|
||||
1 file changed, 9 deletions(-)
|
||||
|
||||
diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
|
||||
index 08ca924..52c468f 100644
|
||||
--- a/manywheel/build_common.sh
|
||||
+++ b/manywheel/build_common.sh
|
||||
@@ -475,11 +475,9 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
fi
|
||||
|
||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||
-
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
fi
|
||||
-
|
||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
|
||||
# Print info on the libraries installed in this wheel
|
||||
@@ -491,11 +489,4 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
ldd "$installed_lib" || true
|
||||
done
|
||||
|
||||
- # Run the tests
|
||||
- echo "$(date) :: Running tests"
|
||||
- pushd "$PYTORCH_ROOT"
|
||||
- LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
|
||||
- "${SOURCE_DIR}/../run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
- popd
|
||||
- echo "$(date) :: Finished tests"
|
||||
fi
|
||||
--
|
||||
2.44.0.windows.1
|
||||
|
||||
Reference in New Issue
Block a user