Compare commits

...

4 Commits

Author SHA1 Message Date
Joseph Macaranas
7dba5d24da [External CI] Fix pytorch nightly build errors
- Add hipSPARSELt dependency.
- Add hipBLASLt test dependency for rocroller shared library.
- Update pip dependency versions.
- Install another typing_extensions at a specific folder for one of the builds we do not control to work.
- Wheel renaming no longer works, so we need to find another mechanism if we start doing builds for different branches and gfx architectures.
2025-06-08 15:04:12 -04:00
Daniel Su
f0bef19f15 Ex CI: roc/hipFFT downstream builds (#4840) 2025-06-06 15:20:46 -04:00
Ian Dass
204032493b [External CI] Ubuntu 24.04 job for llvm-project (#4893)
* [External CI] Ubuntu 24.04 job for llvm-project

* temporarily switch to using 'high' build pool while 'ultra' is  down

* switch almalinux8 to build on manylinux container

* add pool for alma8 container

* switch alma8 packag manager to apt

* Update llvm-project.yml

* switch back to dnf after resolved container init

---------

Co-authored-by: Joseph Macaranas <Joseph.Macaranas@amd.com>
2025-06-06 14:46:30 -04:00
Joseph Macaranas
934dd0892c [External CI] Unblock rocm-libraries progress by not building gfx11 rocprim (#4894) 2025-06-06 14:41:50 -04:00
6 changed files with 187 additions and 78 deletions

View File

@@ -1,10 +1,29 @@
parameters:
- name: componentName
type: string
default: hipFFT
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
@@ -61,7 +80,9 @@ parameters:
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: hipFFT_build_${{ job.target }}
- job: ${{ parameters.componentName }}_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn: ${{ parameters.buildDependsOn[job.target] }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
@@ -79,12 +100,15 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
@@ -102,9 +126,11 @@ jobs:
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -113,8 +139,8 @@ jobs:
# gpuTarget: ${{ job.target }}
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: hipFFT_test_${{ job.target }}
dependsOn: hipFFT_build_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -134,6 +160,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
preTargetFilter: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
@@ -141,10 +168,12 @@ jobs:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: hipFFT
componentName: ${{ parameters.componentName }}
testDir: '$(Agent.BuildDirectory)/rocm/bin'
testExecutable: './hipfft-test'
testParameters: '--test_prob 0.002 --gtest_output=xml:./test_output.xml --gtest_color=yes'

View File

@@ -68,6 +68,18 @@ parameters:
target: gfx942
- gfx90a:
target: gfx90a
- name: downstreamComponentMatrix
type: object
default:
- rocFFT:
name: rocFFT
sparseCheckoutDir: projects/rocfft
skipUnifiedBuild: 'false'
buildDependsOn:
gfx942:
- hipRAND_build_gfx942
gfx90a:
- hipRAND_build_gfx90a
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -172,3 +184,15 @@ jobs:
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}
- ${{ if parameters.triggerDownstreamJobs }}:
- ${{ each component in parameters.downstreamComponentMatrix }}:
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
buildDependsOn: ${{ component.buildDependsOn }}
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
triggerDownstreamJobs: true
unifiedBuild: ${{ parameters.unifiedBuild }}

View File

@@ -30,13 +30,17 @@ parameters:
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt }
- { os: ubuntu2404, packageManager: apt }
- { os: almalinux8, packageManager: dnf }
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: llvm_project_${{ job.os }}
pool:
name: 'rocm-ci_ultra_build_pool'
${{ if eq(job.os, 'ubuntu2404') }}:
name: 'rocm-ci_high_build_pool_2404' #temporarily using 'high' pool while 'ultra' is down
${{ else }}:
name: 'rocm-ci_ultra_build_pool'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest

View File

@@ -1,10 +1,29 @@
parameters:
- name: componentName
type: string
default: rocFFT
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
@@ -59,10 +78,24 @@ parameters:
target: gfx942
- gfx90a:
target: gfx90a
- name: downstreamComponentMatrix
type: object
default:
- hipFFT:
name: hipFFT
sparseCheckoutDir: projects/hipfft
skipUnifiedBuild: 'false'
buildDependsOn:
gfx942:
- rocFFT_build_gfx942
gfx90a:
- rocFFT_build_gfx90a
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocFFT_build_${{ job.target }}
- job: ${{ parameters.componentName }}_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn: ${{ parameters.buildDependsOn[job.target] }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
@@ -79,12 +112,15 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
@@ -101,9 +137,11 @@ jobs:
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -114,8 +152,8 @@ jobs:
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: rocFFT_test_${{ job.target }}
dependsOn: rocFFT_build_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -135,6 +173,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
preTargetFilter: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
@@ -142,10 +181,12 @@ jobs:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: rocFFT
componentName: ${{ parameters.componentName }}
testDir: '$(Agent.BuildDirectory)/rocm/bin'
testExecutable: './rocfft-test'
testParameters: '--test_prob 0.004 --gtest_output=xml:./test_output.xml --gtest_color=yes'
@@ -154,3 +195,15 @@ jobs:
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}
- ${{ if parameters.triggerDownstreamJobs }}:
- ${{ each component in parameters.downstreamComponentMatrix }}:
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
buildDependsOn: ${{ component.buildDependsOn }}
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
triggerDownstreamJobs: true
unifiedBuild: ${{ parameters.unifiedBuild }}

View File

@@ -60,12 +60,12 @@ parameters:
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
- { os: almalinux8, packageManager: dnf, target: gfx942 }
- { os: almalinux8, packageManager: dnf, target: gfx90a }
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 }

View File

@@ -4,71 +4,71 @@ parameters:
- name: aptPackages
type: object
default:
- build-essential
- git
- ninja-build
- openjdk-8-jdk
- ca-certificates
- autoconf
- bc
- bridge-utils
- build-essential
- ca-certificates
- ccache
- devscripts
- dkms
- doxygen
- fakeroot
- ffmpeg
- gfortran
- git
- gnutls-bin
- libamd2
- libavformat-dev
- libblas3
- libcamd2
- libccolamd2
- libcholmod3
- libcolamd2
- libdpkg-dev
- libdpkg-perl
- libdrm-amdgpu1
- libdrm-dev
- libelf-dev
- libfreetype-dev
- libgfortran5
- libgomp1
- libjpeg-dev
- libjpeg-turbo-official
- liblapack-dev
- liblapack3
- libmetis5
- libncurses-dev
- libnuma-dev
- libopenblas-dev
- libpth-dev
- libquadmath0
- libssh-dev
- libstdc++-12-dev
- libsuitesparseconfig5
- libswscale-dev
- libtinfo-dev
- libunwind-dev
- libwebp-dev
- llvm-dev
- ncurses-base
- ninja-build
- numactl
- openjdk-8-jdk
- python-is-python3
- python3-dev
- python3-pip
- python3-venv
- wget
- ncurses-base
- libncurses-dev
- numactl
- libnuma-dev
- libssh-dev
- libunwind-dev
- llvm-dev
- libpth-dev
- qemu-kvm
- re2c
- subversion
- fakeroot
- autoconf
- libgomp1
- libtinfo-dev
- libcholmod3
- libsuitesparseconfig5
- libstdc++-12-dev
- python-is-python3
- gfortran
- libgfortran5
- liblapack3
- libblas3
- libquadmath0
- libmetis5
- libamd2
- libcamd2
- libcolamd2
- libccolamd2
- libdrm-amdgpu1
- ccache
- wget
- zip
- libjpeg-turbo-official
- libjpeg-dev
- libwebp-dev
- libfreetype-dev
- gnutls-bin
- ffmpeg
- libopenblas-dev
- liblapack-dev
- libswscale-dev
- libavformat-dev
- name: pipModules
type: object
default:
- cmake
- astunparse
- "expecttest>=0.2.1"
- "expecttest>=0.3.0"
- hypothesis
- numpy
- psutil
@@ -76,8 +76,8 @@ parameters:
- requests
- setuptools==75.8.0
- types-dataclasses
- "typing-extensions>=4.8.0"
- "sympy>=1.13.0"
- "typing-extensions>=4.10.0"
- "sympy>=1.13.3"
- filelock
- networkx
- jinja2
@@ -97,36 +97,39 @@ parameters:
- name: rocmDependencies
type: object
default:
- rocminfo
- MIOpen
- clr
- hipBLAS
- hipBLASLt
- hipFFT
- hipRAND
- hipSOLVER
- hipSPARSE
- ROCR-Runtime
- hipSPARSELt
- llvm-project
- MIOpen
- rccl
- rocBLAS
- rocFFT
- rocm-core
- rocminfo
- rocm_smi_lib
- rocPRIM
- rocprofiler-register
- rocRAND
- ROCR-Runtime
- rocSOLVER
- rocSPARSE
- roctracer
- hipBLASLt
- rocprofiler-register
- rocm-core
- rocPRIM
# below are additional dependencies not called out by build script, but throw errors during cmake
- composable_kernel
- hipBLAS-common
- hipCUB
- rocThrust
- hipBLAS-common
- composable_kernel
- name: rocmTestDependencies
type: object
default:
# rocroller.so needed and is not included in the wheel
- hipBLASLt
- rocminfo
# Reference on what tests to run for torchvision found in private repo:
# https://github.com/ROCm/rocAutomation/blob/jenkins-pipelines/pytorch/pytorch_ci/test_pytorch_test1.sh#L54
@@ -240,12 +243,6 @@ jobs:
git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules
sudo ln -s $(Build.SourcesDirectory)/builder /builder
workingDirectory: $(Build.SourcesDirectory)
- task: Bash@3
displayName: Temporarily Patch CK Submodule
inputs:
targetType: inline
script: git pull origin develop
workingDirectory: $(Build.SourcesDirectory)/pytorch/third_party/composable_kernel
- task: Bash@3
displayName: Install patchelf
inputs:
@@ -267,6 +264,11 @@ jobs:
script: |
sudo bash pytorch/.ci/docker/common/install_rocm_magma.sh $(MAGMA_ROCM)
workingDirectory: $(Build.SourcesDirectory)
- task: Bash@3
displayName: Install targeted typing_extensions for build
inputs:
targetType: inline
script: pip install --target=$(Build.SourcesDirectory)/pytorch/torch/.. typing_extensions
- task: Bash@3
displayName: Run ROCm Build Script
inputs:
@@ -281,7 +283,6 @@ jobs:
PYTORCH_ROOT=$(PYTORCH_ROOT)
CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
DESIRED_DEVTOOLSET=$(DESIRED_DEVTOOLSET)
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
PYTORCH_BUILD_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)
PYTORCH_BUILD_NUMBER=$(date -u +%Y%m%d)
SKIP_ALL_TESTS=1
@@ -322,8 +323,6 @@ jobs:
inputs:
targetType: inline
script: >-
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
TORCHVISION_PACKAGE_NAME=torchvision.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
PYTORCH_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
BUILD_VERSION=$(cat $(Build.SourcesDirectory)/vision/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
python3 setup.py bdist_wheel
@@ -400,7 +399,7 @@ jobs:
- task: DownloadPipelineArtifact@2
displayName: 'Download Pipeline Wheel Files'
inputs:
itemPattern: '**/*$(JOB_GPU_TARGET)*.whl'
itemPattern: '**/*.whl'
targetPath: $(Agent.BuildDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
parameters: