mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-10 23:28:03 -05:00
Compare commits
40 Commits
20250912-1
...
docs/7.0-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02180e7bf7 | ||
|
|
b66ce0b0a6 | ||
|
|
ac00f08a03 | ||
|
|
4667fbfc83 | ||
|
|
7cb2143e15 | ||
|
|
dc56ba46b6 | ||
|
|
56d74cb542 | ||
|
|
f92dde618b | ||
|
|
9e7a2e711c | ||
|
|
bca2c255bb | ||
|
|
fbfc0bf70d | ||
|
|
8f24297ac5 | ||
|
|
481157e567 | ||
|
|
cc4262185a | ||
|
|
cc7a042501 | ||
|
|
d26a9c8162 | ||
|
|
edf400789a | ||
|
|
7d5abf2dae | ||
|
|
c895490f80 | ||
|
|
95285875d0 | ||
|
|
596a120566 | ||
|
|
684fd6102f | ||
|
|
fb8d86df6a | ||
|
|
da26a1c9cb | ||
|
|
df84dadd43 | ||
|
|
bc6d48ef94 | ||
|
|
4728b201a6 | ||
|
|
a7abd5f67c | ||
|
|
f7e670d36c | ||
|
|
6cebe026b1 | ||
|
|
2134313c4a | ||
|
|
08e5cb4321 | ||
|
|
22732e81a2 | ||
|
|
975c4036a4 | ||
|
|
6df8002b08 | ||
|
|
0f261049bb | ||
|
|
8c7378ba71 | ||
|
|
dcc949f441 | ||
|
|
a4b1b2cc67 | ||
|
|
4f592f8949 |
42
.azuredevops/ci-builds/aomp-mainline.yml
Normal file
42
.azuredevops/ci-builds/aomp-mainline.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: aomp_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/aomp
|
||||
ref: amd-mainline
|
||||
- repository: aomp-extras_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/aomp-extras
|
||||
ref: amd-mainline
|
||||
- repository: flang_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/flang
|
||||
ref: amd-mainline
|
||||
- repository: llvm-project_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/llvm-project
|
||||
ref: amd-mainline
|
||||
pipelines:
|
||||
- pipeline: rocr-runtime_pipeline
|
||||
source: \ROCR-Runtime
|
||||
trigger:
|
||||
branches:
|
||||
include:
|
||||
- amd-mainline
|
||||
# this job will only be triggered after successful build sequence of llvm-project and ROCR-Runtime
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/aomp.yml
|
||||
parameters:
|
||||
checkoutRepo: aomp_repo
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: AMDMIGraphX
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
# - name: sparseCheckoutDir
|
||||
# type: string
|
||||
# default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -112,11 +93,7 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
- job: AMDMIGraphX_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -144,8 +121,6 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -171,12 +146,12 @@ jobs:
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
- job: AMDMIGraphX_test_${{ job.target }}
|
||||
dependsOn: AMDMIGraphX_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
@@ -208,8 +183,6 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: CMake@1
|
||||
displayName: MIGraphXTest CMake Flags
|
||||
inputs:
|
||||
@@ -226,7 +199,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
componentName: AMDMIGraphX
|
||||
testExecutable: make
|
||||
testParameters: -j$(nproc) check
|
||||
testPublishResults: false
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hip_clr_combined
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -54,24 +35,93 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
|
||||
# hip and clr are tightly-coupled
|
||||
# run this same template for both repos
|
||||
# any changes for clr should just trigger HIP pipeline
|
||||
# similarly for hipother repo, for Nvidia backend
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, platform: amd }
|
||||
- { os: ubuntu2204, packageManager: apt, platform: nvidia }
|
||||
- { os: almalinux8, packageManager: dnf, platform: amd }
|
||||
- { os: almalinux8, packageManager: dnf, platform: nvidia }
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
# HIP with AMD backend
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_${{ job.os }}_${{ job.platform }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
- job: hip_clr_combined_${{ job.os }}_amd
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=amd
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: amd
|
||||
|
||||
# HIP with Nvidia backend
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hip_clr_combined_${{ job.os }}_nvidia
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
@@ -90,45 +140,49 @@ jobs:
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# full checkout of rocm-systems superrepo, we need clr, hip, and hipother
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
${{ if eq(job.platform, 'amd') }}:
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
${{ elseif eq(job.platform, 'nvidia') }}:
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
|
||||
displayName: 'Artifact listing'
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/s/projects/clr/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/clr
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=nvidia
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DHIP_COMMON_DIR=$(Agent.BuildDirectory)/s/projects/hip
|
||||
-DHIPNV_DIR=$(Agent.BuildDirectory)/s/projects/hipother/hipnv
|
||||
-DHIP_PLATFORM=${{ job.platform }}
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=ON
|
||||
-DCLR_BUILD_OCL=OFF
|
||||
-DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
artifactName: ${{ job.platform }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: ${{ job.platform }}
|
||||
artifactName: nvidia
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: nvidia
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: MIOpen
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -93,37 +74,16 @@ parameters:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- MIVisionX:
|
||||
name: MIVisionX
|
||||
checkoutRepo: mivisionx_repo
|
||||
sparseCheckoutDir: ''
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- MIOpen_build
|
||||
- AMDMIGraphX:
|
||||
name: AMDMIGraphX
|
||||
checkoutRepo: amdmigraphx_repo
|
||||
sparseCheckoutDir: ''
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- MIOpen_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
- job: MIOpen_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -131,12 +91,10 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/miopen-get-ck-build.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
@@ -146,14 +104,11 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Build and install other dependencies
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
script: |
|
||||
sed -i '/composable_kernel/d' requirements.txt
|
||||
mkdir -p $(Agent.BuildDirectory)/miopen-deps
|
||||
@@ -175,10 +130,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -190,9 +143,9 @@ jobs:
|
||||
- miopen-deps
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
- job: MIOpen_test_${{ job.target }}
|
||||
timeoutInMinutes: 180
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
dependsOn: MIOpen_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -212,12 +165,10 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/miopen-get-ck-build.yml
|
||||
parameters:
|
||||
@@ -227,14 +178,11 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Build and install other dependencies
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
script: |
|
||||
sed -i '/composable_kernel/d' requirements.txt
|
||||
mkdir -p $(Agent.BuildDirectory)/miopen-deps
|
||||
@@ -245,7 +193,7 @@ jobs:
|
||||
displayName: 'MIOpen Test CMake Flags'
|
||||
inputs:
|
||||
cmakeArgs: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/s/bin;$(Agent.BuildDirectory)/miopen-deps
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Build.SourcesDirectory)/bin;$(Agent.BuildDirectory)/miopen-deps
|
||||
-DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
@@ -255,19 +203,19 @@ jobs:
|
||||
-DBUILD_DEV=OFF
|
||||
-DMIOPEN_USE_MLIR=ON
|
||||
-DMIOPEN_GPU_SYNC=OFF
|
||||
$(Agent.BuildDirectory)/s
|
||||
..
|
||||
- task: Bash@3
|
||||
displayName: 'MIOpen Test Build'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: build
|
||||
script: |
|
||||
cmake --build . --target tests -- -j$(nproc)
|
||||
workingDirectory: $(Build.SourcesDirectory)/build
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32|GPU_Conv2dTuningAsmBwdWrw_FP32"'
|
||||
componentName: MIOpen
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32"'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -276,15 +224,3 @@ jobs:
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraCopyDirectories:
|
||||
- miopen-deps
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ component.checkoutRepo }}
|
||||
# # sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: MIVisionX
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
# - name: sparseCheckoutDir
|
||||
# type: string
|
||||
# default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -79,7 +60,6 @@ parameters:
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- aomp
|
||||
- clr
|
||||
- half
|
||||
- hipBLAS-common
|
||||
@@ -108,11 +88,7 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
- job: MIVisionX_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -134,8 +110,6 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -157,12 +131,12 @@ jobs:
|
||||
# gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
- job: MIVisionX_test_${{ job.target }}
|
||||
dependsOn: MIVisionX_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
@@ -187,8 +161,6 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Build MIVisionX tests
|
||||
inputs:
|
||||
@@ -202,7 +174,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
componentName: MIVisionX
|
||||
testDir: 'mivisionx-tests'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
|
||||
@@ -28,8 +28,8 @@ parameters:
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
- llvm-project
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
|
||||
- name: jobMatrix
|
||||
@@ -111,6 +111,14 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- task: Bash@3
|
||||
displayName: Install libhwloc5
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
wget http://ftp.us.debian.org/debian/pool/main/h/hwloc/libhwloc5_1.11.12-3_amd64.deb
|
||||
wget http://ftp.us.debian.org/debian/pool/main/h/hwloc/libhwloc-dev_1.11.12-3_amd64.deb
|
||||
sudo apt install -y --allow-downgrades ./libhwloc5_1.11.12-3_amd64.deb ./libhwloc-dev_1.11.12-3_amd64.deb
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
@@ -153,10 +161,6 @@ jobs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
|
||||
script: |
|
||||
echo $(Build.SourcesDirectory)/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
|
||||
@@ -86,7 +86,8 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: HIP_INC_DIR
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -33,9 +33,8 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- libboost-filesystem-dev
|
||||
- libboost-program-options-dev
|
||||
- libmsgpack-dev
|
||||
- libboost-program-options-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -171,7 +170,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*${{ job.os }}*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
|
||||
@@ -107,7 +107,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
# if this artifact name is changed, please also update $ARTIFACT_URL inside miopen-get-ck-build.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -39,6 +39,4 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
inputs:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
@@ -51,15 +51,15 @@ parameters:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipBLASLt:
|
||||
name: hipBLASLt
|
||||
sparseCheckoutDir: projects/hipblaslt
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipBLAS_common_build
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - hipBLASLt:
|
||||
# name: hipBLASLt
|
||||
# sparseCheckoutDir: projects/hipblaslt
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipBLAS_common_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -122,14 +122,14 @@ jobs:
|
||||
# extraEnvVars:
|
||||
# - ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipBLAS
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -88,30 +69,10 @@ parameters:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
# MIOpen depends on both rocRAND and hipBLAS
|
||||
# for a unified build, hipBLAS will be the one to call MIOpen
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- MIOpen:
|
||||
name: MIOpen
|
||||
sparseCheckoutDir: projects/miopen
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipBLAS_build
|
||||
unifiedBuild:
|
||||
downstreamAggregateNames: hipBLAS+rocRAND
|
||||
buildDependsOn:
|
||||
- hipBLAS_build
|
||||
- rocRAND_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
- job: hipBLAS_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -127,7 +88,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
@@ -135,8 +95,6 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -151,12 +109,9 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -166,67 +121,46 @@ jobs:
|
||||
installAOCL: true
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testExecutable: $(Agent.BuildDirectory)/rocm/bin/hipblas-test
|
||||
testParameters: '--yaml hipblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
${{ if parameters.unifiedBuild }}:
|
||||
buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
${{ else }}:
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipBLAS_test_${{ job.target }}
|
||||
dependsOn: hipBLAS_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipBLAS
|
||||
testExecutable: $(Agent.BuildDirectory)/rocm/bin/hipblas-test
|
||||
testParameters: '--yaml hipblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -35,13 +35,9 @@ parameters:
|
||||
- ccache
|
||||
- gfortran
|
||||
- git
|
||||
- libboost-filesystem-dev
|
||||
- libboost-program-options-dev
|
||||
- libdrm-dev
|
||||
- liblapack-dev
|
||||
- libmsgpack-dev
|
||||
- libnuma-dev
|
||||
- libopenblas-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
@@ -50,12 +46,6 @@ parameters:
|
||||
default:
|
||||
- joblib
|
||||
- "packaging>=22.0"
|
||||
- pyyaml
|
||||
- msgpack
|
||||
- simplejson
|
||||
- ujson
|
||||
- orjson
|
||||
- yappi
|
||||
- --upgrade
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -87,28 +77,28 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { pool: rocm-ci_ultra_build_pool, os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
#- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { pool: rocm-ci_ultra_build_pool, os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
#- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocBLAS:
|
||||
name: rocBLAS
|
||||
sparseCheckoutDir: projects/rocblas
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipBLASLt_build
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - rocBLAS:
|
||||
# name: rocBLAS
|
||||
# sparseCheckoutDir: projects/rocblas
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipBLASLt_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -131,7 +121,7 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: DAY_STRING
|
||||
value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
|
||||
pool: ${{ job.pool }}
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
@@ -150,10 +140,6 @@ jobs:
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -170,16 +156,19 @@ jobs:
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
# hipBLASLt has a script for gtest and lapack
|
||||
# https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
|
||||
# $(Agent.BuildDirectory)/deps is a temporary folder for the build process
|
||||
# $(Agent.BuildDirectory)/s/deps is part of the hipBLASLt repo
|
||||
- task: Bash@3
|
||||
displayName: Build and install LAPACK
|
||||
displayName: Build and install external dependencies
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
mkdir -p $(Agent.BuildDirectory)/temp-deps
|
||||
cd $(Agent.BuildDirectory)/temp-deps
|
||||
# position-independent LAPACK is required for almalinux8 builds
|
||||
cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/sparse/projects/hipblaslt/deps
|
||||
make -j
|
||||
mkdir -p $(Agent.BuildDirectory)/deps
|
||||
cd $(Agent.BuildDirectory)/deps
|
||||
cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
|
||||
make
|
||||
sudo make install
|
||||
- script: |
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
@@ -197,21 +186,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt/build
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DHIPBLASLT_ENABLE_ROCROLLER=ON
|
||||
-DHIPBLASLT_ENABLE_FETCH=ON
|
||||
-DHIPBLASLT_ENABLE_BLIS=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
@@ -261,7 +244,6 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -298,14 +280,14 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -80,11 +80,11 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
- ${{ build }}_${{ job.target }} # todo: add OS
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -141,12 +141,12 @@ jobs:
|
||||
# gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
@@ -156,7 +156,6 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -72,15 +72,15 @@ parameters:
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocFFT:
|
||||
name: rocFFT
|
||||
sparseCheckoutDir: projects/rocfft
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipRAND_build
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - rocFFT:
|
||||
# name: rocFFT
|
||||
# sparseCheckoutDir: projects/rocfft
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipRAND_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -206,14 +206,14 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipSOLVER
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -85,15 +66,12 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
- job: hipSOLVER_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -104,21 +82,18 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# build external gtest and lapack
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: external
|
||||
cmakeBuildDir: '$(Agent.BuildDirectory)/s/deps/build'
|
||||
cmakeSourceDir: '$(Agent.BuildDirectory)/s/deps'
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/deps/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/deps'
|
||||
installDir: '$(Pipeline.Workspace)/deps-install'
|
||||
extraBuildFlags: >-
|
||||
-DBUILD_BOOST=OFF
|
||||
@@ -137,10 +112,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -150,49 +123,44 @@ jobs:
|
||||
# extraCopyDirectories:
|
||||
# - deps-install
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsolver-test'
|
||||
testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipSOLVER_test_${{ job.target }}
|
||||
dependsOn: hipSOLVER_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipSOLVER
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsolver-test'
|
||||
testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipSPARSE
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -33,11 +14,13 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- gfortran
|
||||
- git
|
||||
- libboost-program-options-dev
|
||||
- libfftw3-dev
|
||||
- ninja-build
|
||||
- libboost-program-options-dev
|
||||
- googletest
|
||||
- libfftw3-dev
|
||||
- git
|
||||
- gfortran
|
||||
- libgtest-dev
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -66,31 +49,19 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipSPARSELt:
|
||||
name: hipSPARSELt
|
||||
sparseCheckoutDir: projects/hipsparselt
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipSPARSE_build
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
- job: hipSPARSE_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -102,57 +73,42 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/share/rocm/cmake/
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
artifactName: hipSPARSE
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
artifactName: hipSPARSE
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
publish: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Agent.BuildDirectory)/s/build/clients
|
||||
sourceDir: $(Build.SourcesDirectory)/build/clients
|
||||
contentsString: matrices/**
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
artifactName: testMatrices
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
@@ -160,65 +116,44 @@ jobs:
|
||||
# environment: test
|
||||
# gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsparse-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipSPARSE_test_${{ job.target }}
|
||||
dependsOn: hipSPARSE_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipSPARSE
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsparse-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipSPARSELt
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -75,17 +56,15 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
- job: hipSPARSELt_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -107,23 +86,17 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# ignore sparse checkout for monorepo case, we want access to hipblaslt directory
|
||||
# sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# Build and install gtest and lapack
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
|
||||
@@ -131,10 +104,7 @@ jobs:
|
||||
displayName: Create temp folder for external dependencies
|
||||
# hipSPARSELt already has a CMake script for external deps, so we can just run that
|
||||
# https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt
|
||||
- ${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
script: cmake $(Pipeline.Workspace)/s/projects/hipsparselt/deps
|
||||
${{ else }}:
|
||||
script: cmake $(Pipeline.Workspace)/s/deps
|
||||
- script: cmake $(Pipeline.Workspace)/s/deps
|
||||
displayName: Configure hipSPARSELt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
- script: make
|
||||
@@ -145,7 +115,6 @@ jobs:
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
@@ -158,87 +127,67 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
-GNinja
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt
|
||||
cmakeBuildDir: $(Build.SourcesDirectory)/projects/hipsparselt
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraCopyDirectories:
|
||||
- deps
|
||||
extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
|
||||
- CMAKE_CXX_COMPILER:::/home/user/workspace/rocm/llvm/bin/hipcc
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
installLatestCMake: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraCopyDirectories:
|
||||
- deps
|
||||
extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
|
||||
- CMAKE_CXX_COMPILER:::/home/user/workspace/rocm/llvm/bin/hipcc
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
installLatestCMake: true
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsparselt-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipSPARSELt_test_${{ job.target }}
|
||||
dependsOn: hipSPARSELt_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipSPARSELt
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsparselt-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -30,7 +30,7 @@ parameters:
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
# - { os: ubuntu2404, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
@@ -67,6 +67,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
skipLlvmSymlink: true
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
|
||||
@@ -1,236 +0,0 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: origami
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
- wget
|
||||
- python3
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- nanobind>=2.0.0
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipBLASLt:
|
||||
name: hipBLASLt
|
||||
sparseCheckoutDir: projects/hipblaslt
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- origami_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: origami_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DORIGAMI_BUILD_SHARED_LIBS=ON
|
||||
-DORIGAMI_ENABLE_PYTHON=ON
|
||||
-DORIGAMI_BUILD_TESTING=ON
|
||||
-GNinja
|
||||
- ${{ if ne(job.os, 'almalinux8') }}:
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'Publish Build Directory Artifact'
|
||||
inputs:
|
||||
targetPath: '$(Agent.BuildDirectory)/s/build'
|
||||
artifact: '${{ parameters.componentName }}_${{ job.os }}_build_dir'
|
||||
publishLocation: 'pipeline'
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'Publish Python Source Artifact'
|
||||
inputs:
|
||||
targetPath: '$(Agent.BuildDirectory)/s/python'
|
||||
artifact: '${{ parameters.componentName }}_${{ job.os }}_python_src'
|
||||
publishLocation: 'pipeline'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: origami_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: origami_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Build Directory Artifact'
|
||||
inputs:
|
||||
artifact: '${{ parameters.componentName }}_${{ job.os }}_build_dir'
|
||||
path: '$(Agent.BuildDirectory)/s/build'
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Python Source Artifact'
|
||||
inputs:
|
||||
artifact: '${{ parameters.componentName }}_${{ job.os }}_python_src'
|
||||
path: '$(Agent.BuildDirectory)/s/python'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- script: |
|
||||
export PYTHONPATH=$(Agent.BuildDirectory)/s/build/python:$PYTHONPATH
|
||||
|
||||
echo "--- Running origami_test.py ---"
|
||||
python3 $(Agent.BuildDirectory)/s/python/origami_test.py
|
||||
|
||||
echo "--- Running origami_grid_test.py ---"
|
||||
python3 $(Agent.BuildDirectory)/s/python/origami_grid_test.py
|
||||
displayName: 'Run Python Binding Tests'
|
||||
condition: succeeded()
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -86,7 +86,8 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -73,7 +73,8 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -33,15 +33,17 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- gfortran
|
||||
- libdrm-dev
|
||||
- libmsgpack-dev
|
||||
- libopenblas-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- git
|
||||
- libmsgpack-dev
|
||||
- gfortran
|
||||
- libopenblas-dev
|
||||
- googletest
|
||||
- libgtest-dev
|
||||
- wget
|
||||
- python3-pip
|
||||
- libdrm-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -50,17 +52,18 @@ parameters:
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- aomp
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocm_smi_lib
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- rocm_smi_lib
|
||||
- rocm-core
|
||||
- aomp
|
||||
- aomp-extras
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- roctracer
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
@@ -83,45 +86,32 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
#- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocSPARSE:
|
||||
name: rocSPARSE
|
||||
sparseCheckoutDir: projects/rocsparse
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocBLAS_build
|
||||
# rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# unifiedBuild:
|
||||
# downstreamAggregateNames: rocBLAS+rocPRIM
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# - rocPRIM_build
|
||||
# temporary rocblas->hipblas downstream path while the SOLVERs are disabled
|
||||
- hipBLAS:
|
||||
name: hipBLAS
|
||||
sparseCheckoutDir: projects/hipblas
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocBLAS_build
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# # rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# # for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# unifiedBuild:
|
||||
# downstreamAggregateNames: rocBLAS+rocPRIM
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# - rocPRIM_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -161,12 +151,6 @@ jobs:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -180,12 +164,21 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DTensile_CODE_OBJECT_VERSION=default
|
||||
-DTensile_LOGIC=asm_full
|
||||
-DTensile_SEPARATE_ARCHITECTURES=ON
|
||||
-DTensile_LAZY_LIBRARY_LOADING=ON
|
||||
-DTensile_LIBRARY_FORMAT=msgpack
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_CLIENTS_BENCHMARKS=OFF
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
@@ -215,7 +208,6 @@ jobs:
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
@@ -230,7 +222,6 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -267,18 +258,18 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
${{ if parameters.unifiedBuild }}:
|
||||
buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
${{ else }}:
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# ${{ if parameters.unifiedBuild }}:
|
||||
# buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
# ${{ else }}:
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
|
||||
@@ -8,25 +8,6 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: rocPyDecodeRepo
|
||||
type: string
|
||||
default: rocpydecode_repo
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -75,23 +56,10 @@ parameters:
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocPyDecode:
|
||||
name: rocPyDecode
|
||||
sparseCheckoutDir: ''
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocDecode_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -115,15 +83,12 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -204,15 +169,3 @@ jobs:
|
||||
registerROCmPackages: true
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.rocPyDecodeRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -78,19 +78,19 @@ parameters:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipFFT:
|
||||
name: hipFFT
|
||||
sparseCheckoutDir: projects/hipfft
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocFFT_build
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - hipFFT:
|
||||
# name: hipFFT
|
||||
# sparseCheckoutDir: projects/hipfft
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocFFT_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
@@ -151,12 +151,12 @@ jobs:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
@@ -166,7 +166,6 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -196,14 +195,14 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -27,7 +27,6 @@ parameters:
|
||||
- numpy
|
||||
- tomli
|
||||
- scipy
|
||||
- pybind11
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
|
||||
@@ -91,12 +91,12 @@ parameters:
|
||||
- rocPRIM_build
|
||||
# rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'true'
|
||||
# buildDependsOn:
|
||||
# - rocPRIM_build
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'true'
|
||||
# buildDependsOn:
|
||||
# - rocPRIM_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -210,7 +210,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
|
||||
@@ -5,22 +5,6 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -52,7 +36,6 @@ parameters:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocDecode
|
||||
- rocJPEG
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
@@ -63,19 +46,19 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocPyDecode_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -90,20 +73,16 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: 'Save Python Package Paths'
|
||||
inputs:
|
||||
@@ -210,13 +189,12 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
@@ -243,17 +221,25 @@ jobs:
|
||||
- task: CMake@1
|
||||
displayName: 'rocPyDecode Test CMake Flags'
|
||||
inputs:
|
||||
workingDirectory: $(Agent.BuildDirectory)/rocm/share/rocpydecode/tests
|
||||
cmakeArgs: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(PYTHON_USER_SITE)/pybind11;$(PYTHON_DIST_PACKAGES)/pybind11;$(PYBIND11_PATH)
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
.
|
||||
..
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocPyDecode
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/rocpydecode/tests
|
||||
testDir: $(Build.SourcesDirectory)/build
|
||||
# sudo required for pip install but screws up permissions for next pipeline run
|
||||
- task: Bash@3
|
||||
displayName: Clean up test environment
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
pip uninstall -y rocPyDecode
|
||||
pip uninstall -y hip-python
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -79,12 +79,6 @@ parameters:
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocRAND_build
|
||||
- MIOpen:
|
||||
name: MIOpen
|
||||
sparseCheckoutDir: projects/miopen
|
||||
skipUnifiedBuild: 'true'
|
||||
buildDependsOn:
|
||||
- rocRAND_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
|
||||
@@ -33,11 +33,13 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- gfortran
|
||||
- git
|
||||
- libfmt-dev
|
||||
- libsuitesparse-dev
|
||||
- ninja-build
|
||||
- libsuitesparse-dev
|
||||
- gfortran
|
||||
- libfmt-dev
|
||||
- git
|
||||
- googletest
|
||||
- libgtest-dev
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -73,38 +75,16 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
#- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipBLAS:
|
||||
name: hipBLAS
|
||||
sparseCheckoutDir: projects/hipblas
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocSOLVER_build
|
||||
# hipSOLVER depends on both rocSOLVER and rocSPARSE
|
||||
# for a unified build, rocSOLVER will be the one to call hipSOLVER
|
||||
# - hipSOLVER:
|
||||
# name: hipSOLVER
|
||||
# sparseCheckoutDir: projects/hipsolver
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocSOLVER_build
|
||||
# unifiedBuild:
|
||||
# downstreamAggregateNames: rocSOLVER+rocSPARSE
|
||||
# buildDependsOn:
|
||||
# - rocSOLVER_build
|
||||
# - rocSPARSE_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -139,10 +119,6 @@ jobs:
|
||||
targetType: inline
|
||||
script: git clone --depth 1 --branch v3.9.1 https://github.com/Reference-LAPACK/lapack
|
||||
workingDirectory: '$(Build.SourcesDirectory)'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -158,7 +134,6 @@ jobs:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DCMAKE_Fortran_FLAGS=-fno-optimize-sibling-calls
|
||||
-DBUILD_TESTING=OFF
|
||||
-DCBLAS=ON
|
||||
@@ -171,7 +146,7 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
@@ -216,7 +191,6 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -250,19 +224,3 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
${{ if parameters.unifiedBuild }}:
|
||||
buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
${{ else }}:
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocSPARSE
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -32,25 +13,27 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- gfortran
|
||||
- git
|
||||
- libboost-program-options-dev
|
||||
- libdrm-dev
|
||||
- libfftw3-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- cmake
|
||||
- ninja-build
|
||||
- libboost-program-options-dev
|
||||
- googletest
|
||||
- libfftw3-dev
|
||||
- git
|
||||
- gfortran
|
||||
- libgtest-dev
|
||||
- libdrm-dev
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocBLAS
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- rocBLAS
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
@@ -69,39 +52,19 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipSPARSE:
|
||||
name: hipSPARSE
|
||||
sparseCheckoutDir: projects/hipsparse
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocSPARSE_build
|
||||
# hipSOLVER depends on both rocSOLVER and rocSPARSE
|
||||
# for a unified build, rocSOLVER will be the one to call hipSOLVER
|
||||
# - hipSOLVER:
|
||||
# name: hipSOLVER
|
||||
# sparseCheckoutDir: projects/hipsolver
|
||||
# skipUnifiedBuild: 'true'
|
||||
# buildDependsOn:
|
||||
# - rocSPARSE_build
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
- job: rocSPARSE_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -114,32 +77,22 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
@@ -150,94 +103,68 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
artifactName: rocSPARSE
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
artifactName: rocSPARSE
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
publish: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Agent.BuildDirectory)/s/build/clients
|
||||
sourceDir: $(Build.SourcesDirectory)/build/clients
|
||||
contentsString: matrices/**
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
artifactName: testMatrices
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocsparse-test'
|
||||
testParameters: '--gtest_filter="*quick*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocSPARSE_test_${{ job.target }}
|
||||
timeoutInMinutes: 90
|
||||
dependsOn: rocSPARSE_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocSPARSE
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocsparse-test'
|
||||
testParameters: '--gtest_filter="*quick*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocm-core
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -46,10 +27,6 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_core_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
@@ -73,10 +50,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -90,12 +65,9 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: rocm-examples
|
||||
testDir: $(Build.SourcesDirectory)/build
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "rocfft_callback"'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -1,181 +0,0 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocm_libraries
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- ccache
|
||||
- gfortran
|
||||
- git
|
||||
- libdrm-dev
|
||||
- liblapack-dev
|
||||
- libmsgpack-dev
|
||||
- libnuma-dev
|
||||
- libopenblas-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- joblib
|
||||
- "packaging>=22.0"
|
||||
- pytest
|
||||
- pytest-cmake
|
||||
- --upgrade
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- aomp
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocm-cmake
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- aomp
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { pool: rocm-ci_ultra_build_pool, os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 300
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: DAY_STRING
|
||||
value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
|
||||
pool: ${{ job.pool }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- script: |
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
echo "##vso[task.prependpath]/usr/lib/ccache"
|
||||
displayName: Update path for ccache
|
||||
- task: Cache@2
|
||||
displayName: Ccache caching
|
||||
inputs:
|
||||
key: rocm-libraries | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING) | $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
path: $(CCACHE_DIR)
|
||||
restoreKeys: |
|
||||
rocm-libraries | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING)
|
||||
rocm-libraries | ${{ job.os }} | ${{ job.target }}
|
||||
rocm-libraries | ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Add paths for CMake and Python site-packages binaries
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
USER_BASE=$(python3 -m site --user-base)
|
||||
echo "##vso[task.prependpath]$USER_BASE/bin"
|
||||
echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake"
|
||||
displayName: Set cmake configure paths
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-D CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor;$(PytestCmakePath)
|
||||
-D CMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
-D CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-D CMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-D CMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-G Ninja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
|
||||
installLatestCMake: true
|
||||
extraCopyDirectories:
|
||||
- deps
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocm-smi-lib
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -51,10 +32,6 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_smi_lib_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
@@ -78,10 +55,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -90,56 +65,51 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: rocm_smi_lib_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: rocm_smi_lib_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_smi_lib
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocminfo
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -59,11 +40,7 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
- job: rocminfo_build_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
@@ -85,18 +62,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
skipLlvmSymlink: true
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -105,71 +79,65 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocminfo_test_${{ job.target }}
|
||||
dependsOn: rocminfo_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocminfo'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_agent_enumerator
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocm_agent_enumerator'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocminfo_test_${{ job.target }}
|
||||
dependsOn: rocminfo_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocminfo
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocminfo'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_agent_enumerator
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocm_agent_enumerator'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocprofiler-compute
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -55,7 +36,6 @@ parameters:
|
||||
- pymongo
|
||||
- pyyaml
|
||||
- setuptools
|
||||
- sqlalchemy
|
||||
- tabulate
|
||||
- textual
|
||||
- textual_plotext
|
||||
@@ -85,23 +65,43 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
- gfx942-staging:
|
||||
name: gfx942_staging
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
dependencySource: staging
|
||||
- gfx942-mainline:
|
||||
name: gfx942_mainline
|
||||
target: gfx942
|
||||
dependencySource: mainline
|
||||
- gfx90a-staging:
|
||||
name: gfx90a_staging
|
||||
target: gfx90a
|
||||
dependencySource: staging
|
||||
- gfx90a-mainline:
|
||||
name: gfx90a_mainline
|
||||
target: gfx90a
|
||||
dependencySource: mainline
|
||||
testJobs:
|
||||
- gfx942:
|
||||
- gfx942-staging:
|
||||
name: gfx942_staging
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
dependencySource: staging
|
||||
- gfx942-mainline:
|
||||
name: gfx942_mainline
|
||||
target: gfx942
|
||||
dependencySource: mainline
|
||||
- gfx90a-staging:
|
||||
name: gfx90a_staging
|
||||
target: gfx90a
|
||||
dependencySource: staging
|
||||
- gfx90a-mainline:
|
||||
name: gfx90a_mainline
|
||||
target: gfx90a
|
||||
dependencySource: mainline
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_compute_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
- job: rocprofiler_compute_build_${{ job.name }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -118,19 +118,17 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
artifactName: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
artifactName: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -139,83 +137,80 @@ jobs:
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_compute_test_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: rocprofiler_compute_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PYTHON_VERSION
|
||||
value: 3.10
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Add en_US.UTF-8 locale
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale
|
||||
locale -a
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_HIP_ARCHITECTURES=${{ job.target }}
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DENABLE_TESTS=ON
|
||||
-DINSTALL_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
|
||||
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_compute_test_${{ job.name }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: rocprofiler_compute_build_${{ job.name }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PYTHON_VERSION
|
||||
value: 3.10
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
postTargetFilter: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
dependencySource: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Add en_US.UTF-8 locale
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale
|
||||
locale -a
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_HIP_ARCHITECTURES=${{ job.target }}
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DENABLE_TESTS=ON
|
||||
-DINSTALL_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-compute
|
||||
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
|
||||
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,29 +1,10 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocprofiler-register
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -46,10 +27,6 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_register_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
@@ -73,10 +50,9 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
componentName: rocprofiler-register
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -86,16 +62,12 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: $(Agent.BuildDirectory)/s/build
|
||||
componentName: rocprofiler-register
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
|
||||
@@ -37,7 +37,6 @@ parameters:
|
||||
- libpfm4-dev
|
||||
- libtool
|
||||
- libopenmpi-dev
|
||||
- libsqlite3-dev
|
||||
- m4
|
||||
- ninja-build
|
||||
- openmpi-bin
|
||||
|
||||
@@ -8,22 +8,6 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -86,10 +70,6 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -114,7 +94,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
@@ -129,8 +108,6 @@ jobs:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -138,7 +115,6 @@ jobs:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_MODULE_PATH=$(Build.SourcesDirectory)/cmake_modules;$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/lib64/cmake;$(Agent.BuildDirectory)/rocm/lib64/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DENABLE_LDCONFIG=OFF
|
||||
-DUSE_PROF_API=1
|
||||
@@ -146,13 +122,10 @@ jobs:
|
||||
multithreadFlag: -- -j32
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
@@ -166,68 +139,63 @@ jobs:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/lib/rocprofiler:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1/test:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV1
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1
|
||||
testExecutable: ./run.sh
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV2
|
||||
testDir: $(Agent.BuildDirectory)/rocm
|
||||
testExecutable: share/rocprofiler/tests/runUnitTests
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/lib/rocprofiler:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1/test:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV1
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1
|
||||
testExecutable: ./run.sh
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV2
|
||||
testDir: $(Agent.BuildDirectory)/rocm
|
||||
testExecutable: share/rocprofiler/tests/runUnitTests
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -8,22 +8,6 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -81,10 +65,6 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -107,7 +87,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -115,8 +94,6 @@ jobs:
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# the linker flags will not affect ubuntu2204 builds as the paths do not exist
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -132,13 +109,10 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
@@ -149,57 +123,53 @@ jobs:
|
||||
# gpuTarget: ${{ job.target }}
|
||||
# registerROCmPackages: true
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testExecutable: $(Agent.BuildDirectory)/rocm/share/roctracer/run_tests.sh
|
||||
testParameters: ''
|
||||
testDir: $(Agent.BuildDirectory)
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: roctracer
|
||||
testExecutable: $(Agent.BuildDirectory)/rocm/share/roctracer/run_tests.sh
|
||||
testParameters: ''
|
||||
testDir: $(Agent.BuildDirectory)
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
|
||||
@@ -40,6 +40,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: catch2Version
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: catch2_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone catch2 ${{ parameters.catch2Version }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/catchorg/Catch2.git -b ${{ parameters.catch2Version }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/Catch2/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/Catch2
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -1,67 +0,0 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: fmtlibVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
- libfmt-dev
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: fmtlib_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone fmtlib ${{ parameters.fmtlibVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/fmtlib/fmt.git -b ${{ parameters.fmtlibVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/fmt/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/fmt
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DFMT_SYSTEM_HEADERS=ON
|
||||
-DFMT_INSTALL=ON
|
||||
-DFMT_TEST=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -1,64 +0,0 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: libdivideVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: libdivide_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone libdivide ${{ parameters.libdivideVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/ridiculousfish/libdivide.git -b ${{ parameters.libdivideVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/libdivide/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/libdivide
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DLIBDIVIDE_BUILD_TESTS=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -1,71 +0,0 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: spdlogVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: spdlog_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- fmtlib
|
||||
- task: Bash@3
|
||||
displayName: Clone spdlog ${{ parameters.spdlogVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/gabime/spdlog.git -b ${{ parameters.spdlogVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/spdlog/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/spdlog
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DSPDLOG_USE_STD_FORMAT=OFF
|
||||
-DSPDLOG_FMT_EXTERNAL_HO=ON
|
||||
-DSPDLOG_INSTALL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -219,6 +219,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
setupHIPLibrarySymlinks: true
|
||||
- task: Bash@3
|
||||
@@ -397,7 +398,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
@@ -406,6 +406,8 @@ jobs:
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
dependencySource: staging
|
||||
skipLlvmSymlink: true
|
||||
# get sources to run test scripts
|
||||
- task: Bash@3
|
||||
displayName: git clone upstream pytorch
|
||||
|
||||
@@ -3,21 +3,21 @@ parameters:
|
||||
- name: jobList
|
||||
type: object
|
||||
default:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: ubuntu2404, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2404, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2404, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2404, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2404, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
- { os: ubuntu2204, target: gfx942, source: staging }
|
||||
- { os: ubuntu2204, target: gfx90a, source: staging }
|
||||
- { os: ubuntu2204, target: gfx1201, source: staging }
|
||||
- { os: ubuntu2204, target: gfx1100, source: staging }
|
||||
- { os: ubuntu2204, target: gfx1030, source: staging }
|
||||
- { os: ubuntu2404, target: gfx942, source: staging }
|
||||
- { os: ubuntu2404, target: gfx90a, source: staging }
|
||||
- { os: ubuntu2404, target: gfx1201, source: staging }
|
||||
- { os: ubuntu2404, target: gfx1100, source: staging }
|
||||
- { os: ubuntu2404, target: gfx1030, source: staging }
|
||||
- { os: almalinux8, target: gfx942, source: staging }
|
||||
- { os: almalinux8, target: gfx90a, source: staging }
|
||||
- { os: almalinux8, target: gfx1201, source: staging }
|
||||
- { os: almalinux8, target: gfx1100, source: staging }
|
||||
- { os: almalinux8, target: gfx1030, source: staging }
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -92,8 +92,7 @@ schedules:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobList }}:
|
||||
- job: nightly_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
- job: rocm_nightly_${{ job.os }}_${{ job.target }}_${{ job.source }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -116,9 +115,11 @@ jobs:
|
||||
displayName: System disk space before ROCm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencySource: ${{ job.source }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
skipLibraryLinking: true
|
||||
- script: df -h
|
||||
displayName: System disk space after ROCm
|
||||
- script: du -sh $(Agent.BuildDirectory)/rocm
|
||||
@@ -131,7 +132,7 @@ jobs:
|
||||
includeRootFolder: false
|
||||
archiveType: tar
|
||||
tarCompression: gz
|
||||
archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_${{ job.os }}_${{ job.target }}.tar.gz
|
||||
archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204_${{ job.target }}.tar.gz
|
||||
- script: du -sh $(Build.ArtifactStagingDirectory)
|
||||
displayName: Compressed ROCm size
|
||||
- task: PublishPipelineArtifact@1
|
||||
@@ -144,96 +145,5 @@ jobs:
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: echo "$(Build.DefinitionName)_$(Build.BuildNumber)_${{ job.os }}_${{ job.target }}.tar.gz" >> pipelineArtifacts.txt
|
||||
script: echo "$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204_${{ job.target }}.tar.gz" >> pipelineArtifacts.txt
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.packageManager, 'apt') }}:
|
||||
- task: Bash@3
|
||||
displayName: Create Dockerfile
|
||||
inputs:
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
targetType: inline
|
||||
script: |
|
||||
cat <<'EOF' > Dockerfile
|
||||
${{ iif(eq(job.os, 'ubuntu2204'), 'FROM ubuntu:22.04', '') }}
|
||||
${{ iif(eq(job.os, 'ubuntu2404'), 'FROM ubuntu:24.04', '') }}
|
||||
|
||||
WORKDIR /root
|
||||
RUN mkdir rocm
|
||||
|
||||
RUN apt update \
|
||||
&& apt upgrade -y \
|
||||
&& apt install -y cmake curl git gcc g++ gpg lsb-release lsof ninja-build pkg-config python3 python3-pip wget zip libdrm-dev libelf-dev libgtest-dev libhsakmt-dev libhwloc-dev libnuma-dev libstdc++-12-dev libtbb-dev jq \
|
||||
&& apt clean all
|
||||
|
||||
RUN PACKAGE_NAME=$(curl -s https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb") \
|
||||
&& wget -nv --retry-connrefused https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/$PACKAGE_NAME \
|
||||
&& mkdir hsa-amd-aqlprofile \
|
||||
&& dpkg-deb -R $PACKAGE_NAME hsa-amd-aqlprofile \
|
||||
&& cp -R hsa-amd-aqlprofile/opt/rocm-*/* rocm
|
||||
|
||||
RUN ARTIFACT_URL="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/$(Build.BuildId)/artifacts?artifactName=nightly${{ job.os }}${{ job.target }}&api-version=7.1" \
|
||||
&& DOWNLOAD_URL=$(curl -s $ARTIFACT_URL | jq ".resource.downloadUrl" | tr -d '"') \
|
||||
&& wget -nv --retry-connrefused $DOWNLOAD_URL -O nightly.zip \
|
||||
&& unzip nightly.zip \
|
||||
&& tar -xf nightly${{ job.os }}${{ job.target }}/rocm-nightly*${{ job.os }}*${{ job.target }}*.tar.gz -C rocm
|
||||
|
||||
RUN echo /root/rocm/lib | tee /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN echo /root/rocm/llvm/lib | tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN echo /root/rocm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN echo /root/rocm/llvm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN ldconfig -v
|
||||
ENV PATH="$PATH:/root/rocm/bin"
|
||||
ENTRYPOINT ["/bin/bash"]
|
||||
EOF
|
||||
cat Dockerfile
|
||||
- ${{ elseif eq(job.packageManager, 'dnf') }}:
|
||||
- task: Bash@3
|
||||
displayName: Create Dockerfile
|
||||
inputs:
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
targetType: inline
|
||||
script: |
|
||||
cat <<'EOF' > Dockerfile
|
||||
${{ iif(eq(job.os, 'almalinux8'), 'FROM almalinux:8', '') }}
|
||||
|
||||
WORKDIR /root
|
||||
RUN mkdir rocm
|
||||
|
||||
RUN dnf install -y cmake curl git gcc gcc-c++ gnupg2 redhat-lsb-core lsof pkgconf python3 python3-pip wget zip libdrm-devel elfutils-libelf-devel numactl-devel libstdc++-devel tbb-devel jq \
|
||||
&& dnf clean all
|
||||
|
||||
RUN PACKAGE_NAME=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1) \
|
||||
&& wget -nv --retry-connrefused https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$PACKAGE_NAME \
|
||||
&& mkdir hsa-amd-aqlprofile \
|
||||
&& dnf -y install rpm-build cpio \
|
||||
&& rpm2cpio $PACKAGE_NAME | (cd hsa-amd-aqlprofile && cpio -idmv) \
|
||||
&& cp -R hsa-amd-aqlprofile/opt/rocm-*/* rocm
|
||||
|
||||
RUN ARTIFACT_URL="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/$(Build.BuildId)/artifacts?artifactName=nightly${{ job.os }}${{ job.target }}&api-version=7.1" \
|
||||
&& DOWNLOAD_URL=$(curl -s $ARTIFACT_URL | jq ".resource.downloadUrl" | tr -d '"') \
|
||||
&& wget -nv --retry-connrefused $DOWNLOAD_URL -O nightly.zip \
|
||||
&& UNZIP_DISABLE_ZIPBOMB_DETECTION=TRUE unzip nightly.zip \
|
||||
&& tar -xf nightly${{ job.os }}${{ job.target }}/rocm-nightly*${{ job.os }}*${{ job.target }}*.tar.gz -C rocm
|
||||
|
||||
RUN echo /root/rocm/lib | tee /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN echo /root/rocm/llvm/lib | tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN echo /root/rocm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN echo /root/rocm/llvm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
RUN ldconfig -v
|
||||
ENV PATH="$PATH:/root/rocm/bin"
|
||||
ENTRYPOINT ["/bin/bash"]
|
||||
EOF
|
||||
cat Dockerfile
|
||||
- task: Docker@2
|
||||
displayName: Build and upload Docker image
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
containerRegistry: ContainerService3
|
||||
repository: 'nightly-${{ job.os }}-${{ job.target }}'
|
||||
Dockerfile: '$(Agent.BuildDirectory)/Dockerfile'
|
||||
buildContext: '$(Agent.BuildDirectory)'
|
||||
- task: Bash@3
|
||||
displayName: '!! Docker Run Command !!'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "docker run -it --network=host --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined rocmexternalcicd.azurecr.io/nightly-${{ job.os }}-${{ job.target }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: catch2Version
|
||||
type: string
|
||||
default: "v3.7.0"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/catch2.yml
|
||||
parameters:
|
||||
catch2Version: ${{ parameters.catch2Version }}
|
||||
@@ -1,23 +0,0 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: fmtlibVersion
|
||||
type: string
|
||||
default: "11.1.3"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/fmtlib.yml
|
||||
parameters:
|
||||
fmtlibVersion: ${{ parameters.fmtlibVersion }}
|
||||
@@ -1,23 +0,0 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: libdivideVersion
|
||||
type: string
|
||||
default: master
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/libdivide.yml
|
||||
parameters:
|
||||
libdivideVersion: ${{ parameters.libdivideVersion }}
|
||||
@@ -1,23 +0,0 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: spdlogVersion
|
||||
type: string
|
||||
default: "v1.15.1"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/spdlog.yml
|
||||
parameters:
|
||||
spdlogVersion: ${{ parameters.spdlogVersion }}
|
||||
@@ -12,9 +12,6 @@ parameters:
|
||||
- name: fileFilter
|
||||
type: string
|
||||
default: ''
|
||||
- name: extractAndDeleteFiles
|
||||
type: boolean
|
||||
default: true
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -24,12 +21,8 @@ parameters:
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ parameters.componentName }}
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
${{ if eq(parameters.componentName, 'clr') }}:
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*amd*' # filter out nvidia clr artifacts
|
||||
${{ else }}:
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
allowPartiallySucceededBuilds: true
|
||||
${{ if parameters.aggregatePipeline }}:
|
||||
@@ -44,17 +37,16 @@ steps:
|
||||
buildVersionToDownload: latest # aomp trigger lives in ROCm/ROCm, so cannot use ROCm/aomp branch names
|
||||
${{ else }}:
|
||||
buildVersionToDownload: latestFromBranch
|
||||
- ${{ if eq(parameters.extractAndDeleteFiles, true) }}:
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ parameters.componentName }}
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: '$(Agent.BuildDirectory)/rocm'
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up Compressed ${{ parameters.componentName }}
|
||||
inputs:
|
||||
SourceFolder: '$(Pipeline.Workspace)/d'
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ parameters.componentName }}
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: '$(Agent.BuildDirectory)/rocm'
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Cleanup Compressed ${{ parameters.componentName }}
|
||||
inputs:
|
||||
SourceFolder: '$(Pipeline.Workspace)/d'
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
|
||||
@@ -15,8 +15,8 @@ steps:
|
||||
URL_BEGIN="https://artprodcus3.artifacts.visualstudio.com/"
|
||||
URL_MIDDLE="/_apis/artifact/"
|
||||
URL_END="/content?format=file&subPath=%2F"
|
||||
ARTIFACT_NAME="$(Agent.JobName)_$(System.JobAttempt)"
|
||||
ARTIFACT_STRING="pipelineartifact://ROCm-CI/projectId/$(DOWNLOAD_PROJECT_ID)/buildId/$(Build.BuildId)/artifactName/${ARTIFACT_NAME}"
|
||||
FORMATTED_JOB_NAME=$(echo $(Agent.JobName) | sed 's/ /./g; s/[-_]//g')
|
||||
ARTIFACT_STRING="pipelineartifact://ROCm-CI/projectId/$(DOWNLOAD_PROJECT_ID)/buildId/$(Build.BuildId)/artifactName/${FORMATTED_JOB_NAME}"
|
||||
ENCODED_STRING=$(echo -n "${ARTIFACT_STRING}" | base64 -w 0)
|
||||
PADDING_COUNT=$(echo -n "${ENCODED_STRING}" | awk -F= '{print NF-1}')
|
||||
if [ "$PADDING_COUNT" -gt 0 ]; then
|
||||
|
||||
@@ -46,6 +46,5 @@ steps:
|
||||
displayName: '${{ parameters.artifactName }} Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
# if this artifact name is changed, please also update $ARTIFACT_URL inside miopen-get-ck-build.yml
|
||||
artifactName: $(Agent.JobName)_$(System.JobAttempt)
|
||||
artifactName: ${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt)
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
|
||||
@@ -20,7 +20,7 @@ steps:
|
||||
retryCountOnTaskFailure: 3
|
||||
fetchFilter: blob:none
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }} shared
|
||||
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }}
|
||||
path: sparse
|
||||
- ${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
- task: Bash@3
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: ubuntu2204
|
||||
- name: repositoryUrl
|
||||
type: string
|
||||
default: https://download.amd.com/developer/eula/aocl/aocl-4-2
|
||||
- name: packageName
|
||||
type: object
|
||||
default:
|
||||
ubuntu2204: aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
almalinux8: aocl-linux-gcc-4.2.0-1.x86_64.rpm
|
||||
type: string
|
||||
default: aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
@@ -17,19 +12,16 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
script: wget -nv ${{ parameters.repositoryUrl }}/${{ parameters.packageName[parameters.os] }}
|
||||
script: wget -nv ${{ parameters.repositoryUrl }}/${{ parameters.packageName }}
|
||||
- task: Bash@3
|
||||
displayName: Install AOCL
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: sudo apt install -y ./${{ parameters.packageName[parameters.os] }}
|
||||
${{ elseif eq(parameters.os, 'almalinux8') }}:
|
||||
script: sudo dnf install -y ./${{ parameters.packageName[parameters.os] }}
|
||||
script: sudo apt install -y ./${{ parameters.packageName }}
|
||||
- task: Bash@3
|
||||
displayName: Clean up AOCL
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
script: rm -f ${{ parameters.packageName[parameters.os] }}
|
||||
script: rm -f ${{ parameters.packageName }}
|
||||
|
||||
@@ -10,7 +10,6 @@ steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos (apt)'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -21,8 +20,7 @@ steps:
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- task: Bash@3
|
||||
displayName: 'APT update and install packages'
|
||||
retryCountOnTaskFailure: 3
|
||||
displayName: 'sudo apt-get update'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -30,6 +28,15 @@ steps:
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update && \
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install && \
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get fix'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
|
||||
- ${{ if gt(length(parameters.aptPackages), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get install ...'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
|
||||
|
||||
@@ -5,28 +5,51 @@ parameters:
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Download and install aqlprofile
|
||||
retryCountOnTaskFailure: 3
|
||||
displayName: Get aqlprofile package name
|
||||
inputs:
|
||||
targetType: inline
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1)
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
- task: Bash@3
|
||||
displayName: 'Download aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$(packageName)
|
||||
- task: Bash@3
|
||||
displayName: 'Extract aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
dpkg-deb -R $(packageName) hsa-amd-aqlprofile
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
sudo dnf -y install rpm-build cpio
|
||||
rpm2cpio $(packageName) | (cd hsa-amd-aqlprofile && cpio -idmv)
|
||||
- task: Bash@3
|
||||
displayName: 'Copy aqlprofile files'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
script: |
|
||||
set -e
|
||||
if [ "${{ parameters.os }}" = "ubuntu2204" ]; then
|
||||
packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb") && \
|
||||
wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$packageName && \
|
||||
mkdir -p hsa-amd-aqlprofile && \
|
||||
dpkg-deb -R $packageName hsa-amd-aqlprofile
|
||||
elif [ "${{ parameters.os }}" = "almalinux8" ]; then
|
||||
sudo dnf -y install rpm-build cpio && \
|
||||
packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1) && \
|
||||
wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$packageName && \
|
||||
mkdir -p hsa-amd-aqlprofile && \
|
||||
rpm2cpio $packageName | (cd hsa-amd-aqlprofile && cpio -idmv)
|
||||
else
|
||||
echo "Unsupported OS: ${{ parameters.os }}"
|
||||
exit 1
|
||||
fi && \
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm && \
|
||||
cp -R hsa-amd-aqlprofile/opt/rocm-*/* $(Agent.BuildDirectory)/rocm && \
|
||||
rm -rf hsa-amd-aqlprofile $packageName
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
cp -R hsa-amd-aqlprofile/opt/rocm-*/* $(Agent.BuildDirectory)/rocm
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- task: Bash@3
|
||||
displayName: 'Clean up aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: rm -rf hsa-amd-aqlprofile $(packageName)
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
|
||||
@@ -52,15 +52,12 @@ parameters:
|
||||
libexpat-dev: expat-devel
|
||||
libffi-dev: libffi-devel
|
||||
libfftw3-dev: fftw-devel
|
||||
libfmt-dev: fmt-devel
|
||||
libgmp-dev: gmp-devel
|
||||
liblapack-dev: lapack-devel
|
||||
liblzma-dev: xz-devel
|
||||
libmpfr-dev: mpfr-devel
|
||||
libmsgpack-dev: msgpack-devel
|
||||
libncurses5-dev: ncurses-devel
|
||||
libnuma-dev: numactl-devel
|
||||
libopenblas-dev: openblas-devel
|
||||
libopenmpi-dev: openmpi-devel
|
||||
libpci-dev: libpciaccess-devel
|
||||
libssl-dev: openssl-devel
|
||||
@@ -89,7 +86,6 @@ steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos (dnf)'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -110,13 +106,12 @@ steps:
|
||||
sudo dnf makecache
|
||||
- task: Bash@3
|
||||
displayName: 'Install base dnf packages'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo dnf config-manager --set-enabled powertools
|
||||
# rpm fusion free repo for some dependencies
|
||||
sudo dnf config-manager --set-enabled powertools && \
|
||||
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm && \
|
||||
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
|
||||
sudo dnf -y install ${{ join(' ', parameters.basePackages) }}
|
||||
- task: Bash@3
|
||||
displayName: 'Check gcc environment'
|
||||
@@ -130,7 +125,6 @@ steps:
|
||||
g++ -print-file-name=libstdc++.so
|
||||
- task: Bash@3
|
||||
displayName: 'Set python 3.11 as default'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -145,20 +139,18 @@ steps:
|
||||
- ${{ if eq(pkg, 'ninja-build') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Install ninja 1.11.1'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo dnf -y install unzip && \
|
||||
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip && \
|
||||
unzip ninja-linux.zip && \
|
||||
sudo mv ninja /usr/local/bin/ninja && \
|
||||
sudo chmod +x /usr/local/bin/ninja && \
|
||||
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip
|
||||
sudo dnf -y install unzip
|
||||
unzip ninja-linux.zip
|
||||
sudo mv ninja /usr/local/bin/ninja
|
||||
sudo chmod +x /usr/local/bin/ninja
|
||||
echo "##vso[task.prependpath]/usr/local/bin"
|
||||
- ${{ if ne(parameters.aptToDnfMap[pkg], '') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'dnf install ${{ parameters.aptToDnfMap[pkg] }}'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
|
||||
@@ -27,7 +27,6 @@ steps:
|
||||
- ${{ if gt(length(parameters.pipModules), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'pip install ...'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 -m pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}
|
||||
|
||||
@@ -3,6 +3,13 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: dependencySource # optional, overrides checkoutRef
|
||||
type: string
|
||||
default: null
|
||||
values:
|
||||
- null # empty strings aren't allowed as values, use null instead
|
||||
- staging
|
||||
- mainline
|
||||
- name: dependencyList
|
||||
type: object
|
||||
default: []
|
||||
@@ -12,6 +19,16 @@ parameters:
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if you're calling this template file multiple files in same pipeline
|
||||
# only leave last call false to optimize sequence
|
||||
- name: skipLibraryLinking
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if llvm-project is not downloaded in a particular call
|
||||
# or if you just don't want the symlink
|
||||
- name: skipLlvmSymlink
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if dlopen calls for HIP libraries are causing failures
|
||||
# because they do not follow shared library symlink convention
|
||||
- name: setupHIPLibrarySymlinks
|
||||
@@ -31,240 +48,309 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
AMDMIGraphX:
|
||||
pipelineId: 113
|
||||
developBranch: develop
|
||||
pipelineId: $(AMDMIGRAPHX_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: master
|
||||
hasGpuTarget: true
|
||||
amdsmi:
|
||||
pipelineId: 99
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(AMDSMI_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
aomp-extras:
|
||||
pipelineId: 111
|
||||
developBranch: aomp-dev
|
||||
pipelineId: $(AOMP_EXTRAS_PIPELINE_ID)
|
||||
stagingBranch: aomp-dev
|
||||
mainlineBranch: aomp-dev
|
||||
hasGpuTarget: false
|
||||
aomp:
|
||||
pipelineId: 115
|
||||
developBranch: aomp-dev
|
||||
pipelineId: $(AOMP_PIPELINE_ID)
|
||||
stagingBranch: aomp-dev
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
clr:
|
||||
pipelineId: 335
|
||||
developBranch: develop
|
||||
pipelineId: $(CLR_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
composable_kernel:
|
||||
pipelineId: 86
|
||||
developBranch: develop
|
||||
pipelineId: $(COMPOSABLE_KERNEL_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
half:
|
||||
pipelineId: 101
|
||||
developBranch: rocm
|
||||
pipelineId: $(HALF_PIPELINE_ID)
|
||||
stagingBranch: rocm
|
||||
mainlineBranch: rocm
|
||||
hasGpuTarget: false
|
||||
HIP:
|
||||
pipelineId: 335
|
||||
developBranch: develop
|
||||
pipelineId: $(HIP_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
hip-tests:
|
||||
pipelineId: 233
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(HIP_TESTS_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
hipBLAS:
|
||||
pipelineId: 317
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPBLAS_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
hipBLASLt:
|
||||
pipelineId: 301
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPBLASLT_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
hipBLAS-common:
|
||||
pipelineId: 300
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPBLAS_COMMON_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: false
|
||||
hipCUB:
|
||||
pipelineId: 277
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPCUB_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
hipFFT:
|
||||
pipelineId: 283
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPFFT_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
hipfort:
|
||||
pipelineId: 102
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPFORT_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: false
|
||||
HIPIFY:
|
||||
pipelineId: 92
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(HIPIFY_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
hipRAND:
|
||||
pipelineId: 275
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPRAND_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
hipSOLVER:
|
||||
pipelineId: 84
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPSOLVER_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
hipSPARSE:
|
||||
pipelineId: 315
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPSPARSE_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
hipSPARSELt:
|
||||
pipelineId: 309
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPSPARSELT_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
hipTensor:
|
||||
pipelineId: 105
|
||||
developBranch: develop
|
||||
pipelineId: $(HIPTENSOR_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
llvm-project:
|
||||
pipelineId: 2
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(LLVM_PROJECT_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
MIOpen:
|
||||
pipelineId: 320
|
||||
developBranch: develop
|
||||
pipelineId: $(MIOpen_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: amd-master
|
||||
hasGpuTarget: true
|
||||
MIVisionX:
|
||||
pipelineId: 80
|
||||
developBranch: develop
|
||||
pipelineId: $(MIVISIONX_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: master
|
||||
hasGpuTarget: true
|
||||
omnitrace: # deprecated
|
||||
pipelineId: $(OMNITRACE_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: true
|
||||
rccl:
|
||||
pipelineId: 107
|
||||
developBranch: develop
|
||||
pipelineId: $(RCCL_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
rdc:
|
||||
pipelineId: 100
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(RDC_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocAL:
|
||||
pipelineId: 151
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCAL_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
rocALUTION:
|
||||
pipelineId: 89
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCALUTION_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
rocBLAS:
|
||||
pipelineId: 302
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCBLAS_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
ROCdbgapi:
|
||||
pipelineId: 135
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCDBGAPI_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocDecode:
|
||||
pipelineId: 79
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCDECODE_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: false
|
||||
rocFFT:
|
||||
pipelineId: 282
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCFFT_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
ROCgdb:
|
||||
pipelineId: 134
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCGDB_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline-rocgdb-15
|
||||
hasGpuTarget: false
|
||||
rocJPEG:
|
||||
pipelineId: 262
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCJPEG_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: false
|
||||
rocm-cmake:
|
||||
pipelineId: 6
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCM_CMAKE_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: false
|
||||
rocm-core:
|
||||
pipelineId: 349
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCM_CORE_PIPELINE_ID)
|
||||
stagingBranch: master
|
||||
mainlineBranch: amd-master
|
||||
hasGpuTarget: false
|
||||
rocm-examples:
|
||||
pipelineId: 216
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCM_EXAMPLES_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: true
|
||||
rocminfo:
|
||||
pipelineId: 356
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCMINFO_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocMLIR:
|
||||
pipelineId: 229
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCMLIR_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: false
|
||||
ROCmValidationSuite:
|
||||
pipelineId: 106
|
||||
developBranch: master
|
||||
pipelineId: $(ROCMVALIDATIONSUITE_PIPELINE_ID)
|
||||
stagingBranch: master
|
||||
mainlineBranch: master
|
||||
hasGpuTarget: true
|
||||
rocm_bandwidth_test:
|
||||
pipelineId: 88
|
||||
developBranch: master
|
||||
pipelineId: $(ROCM_BANDWIDTH_TEST_PIPELINE_ID)
|
||||
stagingBranch: master
|
||||
mainlineBranch: master
|
||||
hasGpuTarget: false
|
||||
rocm_smi_lib:
|
||||
pipelineId: 96
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCM_SMI_LIB_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocPRIM:
|
||||
pipelineId: 273
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCPRIM_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocprofiler:
|
||||
pipelineId: 329
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCPROFILER_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-master
|
||||
hasGpuTarget: true
|
||||
rocprofiler-compute:
|
||||
pipelineId: 257
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCPROFILER_COMPUTE_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: true
|
||||
rocprofiler-register:
|
||||
pipelineId: 327
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCPROFILER_REGISTER_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocprofiler-sdk:
|
||||
pipelineId: 246
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCPROFILER_SDK_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: true
|
||||
rocprofiler-systems:
|
||||
pipelineId: 255
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCPROFILER_SYSTEMS_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: true
|
||||
rocPyDecode:
|
||||
pipelineId: 239
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCPYDECODE_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
ROCR-Runtime:
|
||||
pipelineId: 10
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCR_RUNTIME_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocRAND:
|
||||
pipelineId: 274
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCRAND_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocr_debug_agent:
|
||||
pipelineId: 136
|
||||
developBranch: amd-staging
|
||||
pipelineId: $(ROCR_DEBUG_AGENT_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: false
|
||||
rocSOLVER:
|
||||
pipelineId: 81
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCSOLVER_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
rocSPARSE:
|
||||
pipelineId: 314
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCSPARSE_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
ROCT-Thunk-Interface: # deprecated
|
||||
pipelineId: $(ROCT_THUNK_INTERFACE_PIPELINE_ID)
|
||||
stagingBranch: master
|
||||
mainlineBranch: master
|
||||
hasGpuTarget: false
|
||||
rocThrust:
|
||||
pipelineId: 276
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCTHRUST_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
roctracer:
|
||||
pipelineId: 331
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCTRACER_PIPELINE_ID)
|
||||
stagingBranch: amd-staging
|
||||
mainlineBranch: amd-mainline
|
||||
hasGpuTarget: true
|
||||
rocWMMA:
|
||||
pipelineId: 109
|
||||
developBranch: develop
|
||||
pipelineId: $(ROCWMMA_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
rpp:
|
||||
pipelineId: 78
|
||||
developBranch: develop
|
||||
pipelineId: $(RPP_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
TransferBench:
|
||||
pipelineId: 265
|
||||
developBranch: develop
|
||||
pipelineId: $(TRANSFERBENCH_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
hasGpuTarget: true
|
||||
|
||||
steps:
|
||||
@@ -280,44 +366,72 @@ steps:
|
||||
parameters:
|
||||
componentName: ${{ split(dependency, ':')[0] }}
|
||||
pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].developBranch }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
extractAndDeleteFiles: false
|
||||
${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
|
||||
fileFilter: "${{ split(dependency, ':')[1] }}*_${{ parameters.os }}_${{ parameters.gpuTarget }}"
|
||||
# dependencySource = staging
|
||||
${{ if eq(parameters.dependencySource, 'staging')}}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
|
||||
# dependencySource = mainline
|
||||
${{ elseif eq(parameters.dependencySource, 'mainline')}}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].mainlineBranch }}
|
||||
# checkoutRef = staging
|
||||
${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].stagingBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
|
||||
# checkoutRef = mainline
|
||||
${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].mainlineBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].mainlineBranch }}
|
||||
# SourceBranchName = staging
|
||||
${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].stagingBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
|
||||
# SourceBranchName = mainline
|
||||
${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].mainlineBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].mainlineBranch }}
|
||||
# default = staging
|
||||
${{ else }}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
|
||||
# no colon (:) found in this item in the list
|
||||
- ${{ elseif containsValue(split(parameters.downstreamAggregateNames, '+'), dependency) }}:
|
||||
- template: local-artifact-download.yml
|
||||
parameters:
|
||||
buildType: current
|
||||
preTargetFilter: ${{ dependency }}
|
||||
os: ${{ parameters.os }}
|
||||
${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
|
||||
gpuTarget: ${{ parameters.gpuTarget }}
|
||||
preTargetFilter: ${{ dependency }}
|
||||
os: ${{ parameters.os }}
|
||||
buildType: current
|
||||
- ${{ else }}:
|
||||
- template: artifact-download.yml
|
||||
parameters:
|
||||
componentName: ${{ dependency }}
|
||||
pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
|
||||
branchName: ${{ parameters.componentVarList[dependency].developBranch }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
extractAndDeleteFiles: false
|
||||
${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
|
||||
fileFilter: ${{ parameters.os }}_${{ parameters.gpuTarget }}
|
||||
${{ else }}:
|
||||
fileFilter: ${{ parameters.os }}
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ROCm artifacts
|
||||
inputs:
|
||||
archiveFilePatterns: $(Pipeline.Workspace)/d/**/*.tar.gz
|
||||
destinationFolder: $(Agent.BuildDirectory)/rocm
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up ROCm artifacts
|
||||
inputs:
|
||||
SourceFolder: $(Pipeline.Workspace)/d
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
- ${{ if containsValue(parameters.dependencyList, 'llvm-project') }}:
|
||||
# dependencySource = staging
|
||||
${{ if eq(parameters.dependencySource, 'staging')}}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
|
||||
# dependencySource = mainline
|
||||
${{ elseif eq(parameters.dependencySource, 'mainline')}}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].mainlineBranch }}
|
||||
# checkoutRef = staging
|
||||
${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].stagingBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
|
||||
# checkoutRef = mainline
|
||||
${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].mainlineBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].mainlineBranch }}
|
||||
# SourceBranchName = staging
|
||||
${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].stagingBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
|
||||
# SourceBranchName = mainline
|
||||
${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].mainlineBranch) }}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].mainlineBranch }}
|
||||
# default = staging
|
||||
${{ else }}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
|
||||
# Set link to redirect llvm folder
|
||||
- ${{ if eq(parameters.skipLlvmSymlink, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: Symlink from rocm/llvm to rocm/lib/llvm
|
||||
inputs:
|
||||
@@ -325,7 +439,6 @@ steps:
|
||||
script: |
|
||||
sudo mkdir -p $(Agent.BuildDirectory)/rocm/lib
|
||||
sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
echo "Created symlink from rocm/llvm to rocm/lib/llvm"
|
||||
- task: Bash@3
|
||||
displayName: Symlink executables from rocm/llvm/bin to rocm/bin
|
||||
inputs:
|
||||
@@ -333,14 +446,7 @@ steps:
|
||||
script: |
|
||||
for file in amdclang amdclang++ amdclang-cl amdclang-cpp amdflang amdlld aompcc mygpu mycpu offload-arch; do
|
||||
sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
|
||||
echo "Created symlink from rocm/llvm/bin/$file to rocm/bin/$file"
|
||||
done
|
||||
- ${{ if containsValue(parameters.dependencyList, 'rocm-core') }}:
|
||||
- task: Bash@3
|
||||
displayName: Print rocm/.info/version
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: cat $(Agent.BuildDirectory)/rocm/.info/version
|
||||
# dlopen calls within a ctest or pytest sequence runs into issues when shared library symlink convention is not followed
|
||||
# the convention is as follows:
|
||||
# unversioned .so is a symlink to major version .so
|
||||
@@ -377,16 +483,17 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ls -la1R $(Agent.BuildDirectory)/rocm
|
||||
- task: Bash@3
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
- ${{ if eq(parameters.skipLibraryLinking, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
|
||||
@@ -8,20 +8,15 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
boost: 250
|
||||
catch2: 343
|
||||
fmtlib: 341
|
||||
grpc: 72
|
||||
gtest: 73
|
||||
half560: 68
|
||||
lapack: 69
|
||||
libdivide: 342
|
||||
spdlog: 340
|
||||
|
||||
steps:
|
||||
- ${{ each dependency in parameters.dependencyList }}:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ dependency }}
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
project: ROCm-CI
|
||||
buildType: specific
|
||||
@@ -33,7 +28,7 @@ steps:
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: $(Agent.BuildDirectory)/vendor
|
||||
cleanDestinationFolder: false
|
||||
cleanDestinationFolder: true
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up ${{ dependency }}
|
||||
|
||||
@@ -33,7 +33,6 @@ parameters:
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
${{ if eq(parameters.buildType, 'specific') }}:
|
||||
buildType: specific
|
||||
|
||||
@@ -23,14 +23,13 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
${{ iif(or(eq(parameters.os, 'ubuntu2204'), eq(parameters.os, 'ubuntu2404')), 'sudo apt-get install -y jq', '') }}
|
||||
sudo apt-get install -y jq
|
||||
|
||||
# RESOURCES_REPOSITORIES is a runtime variable (not an env var!) that contains quotations and newlines
|
||||
# So we need to save it to a file to properly preserve its formatting and contents
|
||||
cat <<EOF > resources.repositories
|
||||
$(RESOURCES_REPOSITORIES)
|
||||
EOF
|
||||
echo "Value of resources.repositories:"
|
||||
cat resources.repositories
|
||||
|
||||
IS_TAG_BUILD=$(jq 'has("release_repo")' resources.repositories)
|
||||
@@ -67,6 +66,8 @@ steps:
|
||||
)
|
||||
' resources.repositories)
|
||||
|
||||
manifest_json=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json
|
||||
|
||||
dependencies=()
|
||||
for manifest_file in $(Pipeline.Workspace)/d/**/manifest_*.json; do
|
||||
echo "Processing $manifest_file"
|
||||
@@ -77,10 +78,6 @@ steps:
|
||||
done
|
||||
dependencies_json=$(printf '%s\n' "${dependencies[@]}" | jq -s '.')
|
||||
|
||||
manifest_filename="manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}"
|
||||
echo "##vso[task.setvariable variable=manifest_filename]$manifest_filename"
|
||||
manifest_json=$(Build.ArtifactStagingDirectory)/$manifest_filename.json
|
||||
|
||||
jq -n \
|
||||
--argjson current "$current" \
|
||||
--argjson dependencies "$dependencies_json" \
|
||||
@@ -114,14 +111,8 @@ steps:
|
||||
')
|
||||
dependencies_rows=$(echo $dependencies_rows)
|
||||
echo "##vso[task.setvariable variable=dependencies_rows;]$dependencies_rows"
|
||||
- task: Bash@3
|
||||
displayName: Print manifest.json
|
||||
condition: always()
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
cat $(Build.ArtifactStagingDirectory)/$(manifest_filename).json
|
||||
|
||||
cat $manifest_json
|
||||
- task: Bash@3
|
||||
displayName: Create manifest.html
|
||||
condition: always()
|
||||
@@ -129,10 +120,10 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
manifest_html="$(Build.ArtifactStagingDirectory)/$(manifest_filename).html"
|
||||
manifest_html=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
cat <<EOF > $manifest_html
|
||||
<html>
|
||||
<h1>$(manifest_filename)</h1>
|
||||
<h1>Manifest</h1>
|
||||
<h2>Current</h2>
|
||||
<table border="1">
|
||||
<tr>
|
||||
@@ -172,7 +163,7 @@ steps:
|
||||
continueOnError: true
|
||||
inputs:
|
||||
tabName: Manifest
|
||||
reportDir: $(Build.ArtifactStagingDirectory)/$(manifest_filename).html
|
||||
reportDir: $(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
- task: Bash@3
|
||||
displayName: Save manifest artifact file name
|
||||
condition: always()
|
||||
@@ -181,5 +172,5 @@ steps:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "$(manifest_filename).html" >> pipelineArtifacts.txt
|
||||
echo "$(manifest_filename).json" >> pipelineArtifacts.txt
|
||||
echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
|
||||
echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
|
||||
|
||||
@@ -7,16 +7,17 @@ steps:
|
||||
- task: Bash@3
|
||||
name: downloadCKBuild
|
||||
displayName: Download specific CK build
|
||||
retryCountOnTaskFailure: 3
|
||||
continueOnError: true
|
||||
env:
|
||||
CXX: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
CC: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
script: |
|
||||
AZ_API="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis"
|
||||
GH_API="https://api.github.com/repos/ROCm"
|
||||
ARTIFACT_NAME="composablekernelbuild${{ parameters.gpuTarget }}"
|
||||
EXIT_CODE=0
|
||||
|
||||
# Try to find an Azure build for the specific CK commit called out in MIOpen's requirements.txt
|
||||
@@ -38,15 +39,8 @@ steps:
|
||||
echo "Found specific CK build ID: $CK_BUILD_ID"
|
||||
fi
|
||||
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | \
|
||||
jq --arg gfx "${{ parameters.gpuTarget }}" '
|
||||
.value
|
||||
| map(select(.name | test($gfx)))
|
||||
| max_by(.name | capture("_(?<dropNumber>\\d+)").dropNumber | tonumber)
|
||||
| .resource.downloadUrl
|
||||
' | \
|
||||
tr -d '"')
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?artifactName=$ARTIFACT_NAME&api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | jq '.resource.downloadUrl' | tr -d '"')
|
||||
|
||||
# If using the specific CK commit and it doesn't have any valid artifacts, use latest successful CK build instead
|
||||
if { [[ -z "$ARTIFACT_URL" ]] || [[ "$ARTIFACT_URL" == "null" ]]; } && [[ $EXIT_CODE -eq 0 ]]; then
|
||||
@@ -54,45 +48,17 @@ steps:
|
||||
LATEST_BUILD_URL="$AZ_API/build/builds?definitions=$(COMPOSABLE_KERNEL_PIPELINE_ID)&statusFilter=completed&resultFilter=succeeded&\$top=1&api-version=7.1"
|
||||
CK_BUILD_ID=$(curl -s $LATEST_BUILD_URL | jq '.value[0].id')
|
||||
echo "Found latest CK build ID: $CK_BUILD_ID"
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | \
|
||||
jq --arg os "ubuntu2204" --arg gfx "${{ parameters.gpuTarget }}" '
|
||||
.value
|
||||
| map(select(.name | test($os) and test($gfx)))
|
||||
| max_by(.name | capture("_(?<dropNumber>\\d+)").dropNumber | tonumber)
|
||||
| .resource.downloadUrl
|
||||
' | \
|
||||
tr -d '"')
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?artifactName=$ARTIFACT_NAME&api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | jq '.resource.downloadUrl' | tr -d '"')
|
||||
EXIT_CODE=2
|
||||
fi
|
||||
|
||||
echo "Downloading CK artifact from $ARTIFACT_URL"
|
||||
|
||||
RETRIES=0
|
||||
MAX_RETRIES=5
|
||||
SUCCESS=false
|
||||
while [ $RETRIES -lt $MAX_RETRIES ]; do
|
||||
wget -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip && \
|
||||
unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory) && \
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm && \
|
||||
tar -zxvf $(System.ArtifactsDirectory)/composable_kernel*/*.tar.gz -C $(Agent.BuildDirectory)/rocm && \
|
||||
rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/composable_kernel*
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
SUCCESS=true
|
||||
echo "Successfully downloaded CK."
|
||||
break
|
||||
else
|
||||
RETRIES=$((RETRIES + 1))
|
||||
echo "Failed to download CK on attempt $RETRIES/$MAX_RETRIES, retrying..."
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$SUCCESS" = false ]; then
|
||||
echo "ERROR: failed to download CK after $MAX_RETRIES attempts."
|
||||
exit 1
|
||||
fi
|
||||
wget --tries=5 --waitretry=10 --retry-connrefused -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
|
||||
unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory)
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
tar -zxvf $(System.ArtifactsDirectory)/$ARTIFACT_NAME/*.tar.gz -C $(Agent.BuildDirectory)/rocm
|
||||
rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/$ARTIFACT_NAME
|
||||
|
||||
if [[ $EXIT_CODE -ne 0 ]]; then
|
||||
BUILD_COMMIT=$(curl -s $AZ_API/build/builds/$CK_BUILD_ID | jq '.sourceVersion' | tr -d '"')
|
||||
@@ -103,3 +69,4 @@ steps:
|
||||
fi
|
||||
echo "Instead used latest CK build $CK_BUILD_ID for commit $BUILD_COMMIT"
|
||||
fi
|
||||
exit $EXIT_CODE
|
||||
|
||||
@@ -23,25 +23,145 @@ variables:
|
||||
value: rocm-ci_high_build_pool
|
||||
- name: ULTRA_BUILD_POOL
|
||||
value: rocm-ci_ultra_build_pool
|
||||
- name: ON_PREM_BUILD_POOL
|
||||
value: rocm-ci_build_pool
|
||||
- name: LARGE_DISK_BUILD_POOL
|
||||
value: rocm-ci_larger_base_disk_pool
|
||||
- name: GFX942_TEST_POOL
|
||||
value: gfx942_test_pool
|
||||
- name: GFX90A_TEST_POOL
|
||||
value: gfx90a_test_pool
|
||||
- name: LATEST_RELEASE_VERSION
|
||||
value: 6.4.3
|
||||
value: 6.4.1
|
||||
- name: REPO_RADEON_VERSION
|
||||
value: 6.4.3
|
||||
value: 6.4.1
|
||||
- name: NEXT_RELEASE_VERSION
|
||||
value: 7.0.0
|
||||
- name: LATEST_RELEASE_TAG
|
||||
value: rocm-6.4.3
|
||||
value: rocm-6.4.1
|
||||
- name: DOCKER_SKIP_GFX
|
||||
value: gfx90a
|
||||
- name: AMDMIGRAPHX_PIPELINE_ID
|
||||
value: 113
|
||||
- name: AMDSMI_PIPELINE_ID
|
||||
value: 99
|
||||
- name: AOMP_EXTRAS_PIPELINE_ID
|
||||
value: 111
|
||||
- name: AOMP_PIPELINE_ID
|
||||
value: 115
|
||||
- name: CLR_PIPELINE_ID
|
||||
value: 145
|
||||
- name: COMPOSABLE_KERNEL_PIPELINE_ID
|
||||
value: 86
|
||||
- name: FLANG_LEGACY_PIPELINE_ID
|
||||
value: 77
|
||||
- name: HALF_PIPELINE_ID
|
||||
value: 101
|
||||
- name: HALF560_PIPELINE_ID
|
||||
value: 68
|
||||
- name: HALF560_BUILD_ID
|
||||
value: 621
|
||||
- name: HIP_PIPELINE_ID
|
||||
value: 93
|
||||
- name: HIP_TESTS_PIPELINE_ID
|
||||
value: 233
|
||||
- name: HIPBLAS_COMMON_PIPELINE_ID
|
||||
value: 223
|
||||
- name: HIPBLAS_PIPELINE_ID
|
||||
value: 87
|
||||
- name: HIPBLASLT_PIPELINE_ID
|
||||
value: 112
|
||||
- name: HIPCUB_PIPELINE_ID
|
||||
value: 277
|
||||
- name: HIPFFT_PIPELINE_ID
|
||||
value: 121
|
||||
- name: HIPFORT_PIPELINE_ID
|
||||
value: 102
|
||||
- name: HIPIFY_PIPELINE_ID
|
||||
value: 92
|
||||
- name: HIPRAND_PIPELINE_ID
|
||||
value: 275
|
||||
- name: HIPSOLVER_PIPELINE_ID
|
||||
value: 84
|
||||
- name: HIPSPARSE_PIPELINE_ID
|
||||
value: 83
|
||||
- name: HIPSPARSELT_PIPELINE_ID
|
||||
value: 104
|
||||
- name: HIPTENSOR_PIPELINE_ID
|
||||
value: 105
|
||||
- name: LLVM_PROJECT_PIPELINE_ID
|
||||
value: 2
|
||||
- name: MIOPEN_PIPELINE_ID
|
||||
value: 108
|
||||
- name: MIVISIONX_PIPELINE_ID
|
||||
value: 80
|
||||
- name: RCCL_PIPELINE_ID
|
||||
value: 107
|
||||
- name: RDC_PIPELINE_ID
|
||||
value: 100
|
||||
- name: ROCAL_PIPELINE_ID
|
||||
value: 151
|
||||
- name: ROCALUTION_PIPELINE_ID
|
||||
value: 89
|
||||
- name: ROCBLAS_PIPELINE_ID
|
||||
value: 85
|
||||
- name: ROCDBGAPI_PIPELINE_ID
|
||||
value: 135
|
||||
- name: ROCDECODE_PIPELINE_ID
|
||||
value: 79
|
||||
- name: ROCFFT_PIPELINE_ID
|
||||
value: 120
|
||||
- name: ROCGDB_PIPELINE_ID
|
||||
value: 134
|
||||
- name: ROCJPEG_PIPELINE_ID
|
||||
value: 262
|
||||
- name: ROCM_BANDWIDTH_TEST_PIPELINE_ID
|
||||
value: 88
|
||||
- name: ROCM_CMAKE_PIPELINE_ID
|
||||
value: 6
|
||||
- name: ROCM_CORE_PIPELINE_ID
|
||||
value: 103
|
||||
- name: ROCM_EXAMPLES_PIPELINE_ID
|
||||
value: 216
|
||||
- name: ROCM_SMI_LIB_PIPELINE_ID
|
||||
value: 96
|
||||
- name: ROCMINFO_PIPELINE_ID
|
||||
value: 91
|
||||
- name: ROCMLIR_PIPELINE_ID
|
||||
value: 229
|
||||
- name: ROCMVALIDATIONSUITE_PIPELINE_ID
|
||||
value: 106
|
||||
- name: ROCPRIM_PIPELINE_ID
|
||||
value: 273
|
||||
- name: ROCPROFILER_COMPUTE_PIPELINE_ID
|
||||
value: 257
|
||||
- name: ROCPROFILER_REGISTER_PIPELINE_ID
|
||||
value: 1
|
||||
- name: ROCPROFILER_SDK_PIPELINE_ID
|
||||
value: 246
|
||||
- name: ROCPROFILER_SYSTEMS_PIPELINE_ID
|
||||
value: 255
|
||||
- name: ROCPROFILER_PIPELINE_ID
|
||||
value: 143
|
||||
- name: ROCPYDECODE_PIPELINE_ID
|
||||
value: 239
|
||||
- name: ROCR_DEBUG_AGENT_PIPELINE_ID
|
||||
value: 136
|
||||
- name: ROCR_RUNTIME_PIPELINE_ID
|
||||
value: 10
|
||||
- name: ROCRAND_PIPELINE_ID
|
||||
value: 274
|
||||
- name: ROCSOLVER_PIPELINE_ID
|
||||
value: 81
|
||||
- name: ROCSPARSE_PIPELINE_ID
|
||||
value: 98
|
||||
- name: ROCTHRUST_PIPELINE_ID
|
||||
value: 276
|
||||
- name: ROCTRACER_PIPELINE_ID
|
||||
value: 141
|
||||
- name: ROCWMMA_PIPELINE_ID
|
||||
value: 109
|
||||
- name: RPP_PIPELINE_ID
|
||||
value: 78
|
||||
- name: TRANSFERBENCH_PIPELINE_ID
|
||||
value: 265
|
||||
|
||||
@@ -1,3 +1,44 @@
|
||||
GovReport
|
||||
MLPerf
|
||||
QKV
|
||||
summarization
|
||||
gpt
|
||||
openai
|
||||
oss
|
||||
MXFP
|
||||
SGLang
|
||||
VMware
|
||||
amd
|
||||
bdf
|
||||
compatiblity
|
||||
csv
|
||||
enum
|
||||
json
|
||||
subproject
|
||||
ROCpd
|
||||
rocpd
|
||||
STL
|
||||
XCCs
|
||||
chiplets
|
||||
hipRTC
|
||||
nvRTC
|
||||
warpSize
|
||||
Datacenter
|
||||
GST
|
||||
IET
|
||||
LTO
|
||||
MX
|
||||
Microscaling
|
||||
NANOO
|
||||
ROCprof
|
||||
affinitization
|
||||
amdclang
|
||||
benefitting
|
||||
demangled
|
||||
inlined
|
||||
microscaling
|
||||
roofline
|
||||
torchtitan
|
||||
AAC
|
||||
ABI
|
||||
ACE
|
||||
@@ -5,9 +46,7 @@ ACEs
|
||||
ACS
|
||||
AccVGPR
|
||||
AccVGPRs
|
||||
AITER
|
||||
ALU
|
||||
AllReduce
|
||||
AMD
|
||||
AMDGPU
|
||||
AMDGPUs
|
||||
@@ -15,7 +54,6 @@ AMDMIGraphX
|
||||
AMI
|
||||
AOCC
|
||||
AOMP
|
||||
AOT
|
||||
AOTriton
|
||||
APBDIS
|
||||
APIC
|
||||
@@ -35,7 +73,6 @@ Andrej
|
||||
Arb
|
||||
Autocast
|
||||
BARs
|
||||
BatchNorm
|
||||
BLAS
|
||||
BMC
|
||||
BabelStream
|
||||
@@ -46,7 +83,6 @@ Bootloader
|
||||
CAS
|
||||
CCD
|
||||
CDNA
|
||||
CGUI
|
||||
CHTML
|
||||
CIFAR
|
||||
CLI
|
||||
@@ -84,13 +120,10 @@ ConnectX
|
||||
CuPy
|
||||
da
|
||||
Dashboarding
|
||||
Dataloading
|
||||
DBRX
|
||||
DDR
|
||||
DF
|
||||
DGEMM
|
||||
DGL
|
||||
DGLGraph
|
||||
dGPU
|
||||
dGPUs
|
||||
DIMM
|
||||
@@ -108,7 +141,6 @@ DataFrame
|
||||
DataLoader
|
||||
DataParallel
|
||||
Debian
|
||||
decompositions
|
||||
DeepSeek
|
||||
DeepSpeed
|
||||
Dependabot
|
||||
@@ -116,15 +148,12 @@ Deprecations
|
||||
DevCap
|
||||
DirectX
|
||||
Dockerfile
|
||||
Dockerized
|
||||
Doxygen
|
||||
dropless
|
||||
ELMo
|
||||
ENDPGM
|
||||
EPYC
|
||||
ESXi
|
||||
EoS
|
||||
fas
|
||||
FBGEMM
|
||||
FFT
|
||||
FFTs
|
||||
@@ -137,12 +166,10 @@ FX
|
||||
Filesystem
|
||||
FindDb
|
||||
Flang
|
||||
FlashAttention
|
||||
FluxBenchmark
|
||||
Fortran
|
||||
Fuyu
|
||||
GALB
|
||||
GAT
|
||||
GCC
|
||||
GCD
|
||||
GCDs
|
||||
@@ -156,7 +183,6 @@ GEMMs
|
||||
GFLOPS
|
||||
GFortran
|
||||
GFXIP
|
||||
GGUF
|
||||
Gemma
|
||||
GiB
|
||||
GIM
|
||||
@@ -171,8 +197,6 @@ GPT
|
||||
GPU
|
||||
GPU's
|
||||
GPUs
|
||||
Graphbolt
|
||||
GraphSage
|
||||
GRBM
|
||||
GenAI
|
||||
GenZ
|
||||
@@ -182,11 +206,9 @@ HBM
|
||||
HCA
|
||||
HGX
|
||||
HIPCC
|
||||
hipDataType
|
||||
HIPExtension
|
||||
HIPIFY
|
||||
HIPification
|
||||
hipification
|
||||
HIPify
|
||||
HPC
|
||||
HPCG
|
||||
@@ -198,11 +220,9 @@ HWE
|
||||
HWS
|
||||
Haswell
|
||||
Higgs
|
||||
href
|
||||
Hyperparameters
|
||||
Huggingface
|
||||
ICD
|
||||
ICT
|
||||
ICV
|
||||
IDE
|
||||
IDEs
|
||||
@@ -237,7 +257,6 @@ KV
|
||||
KVM
|
||||
Karpathy's
|
||||
KiB
|
||||
Kineto
|
||||
Keras
|
||||
Khronos
|
||||
LAPACK
|
||||
@@ -250,7 +269,6 @@ LM
|
||||
LSAN
|
||||
LSan
|
||||
LTS
|
||||
LSTMs
|
||||
LanguageCrossEntropy
|
||||
LoRA
|
||||
MEM
|
||||
@@ -278,7 +296,6 @@ Makefiles
|
||||
Matplotlib
|
||||
Matrox
|
||||
MaxText
|
||||
Megablocks
|
||||
Megatrends
|
||||
Megatron
|
||||
Mellanox
|
||||
@@ -288,13 +305,10 @@ Miniconda
|
||||
MirroredStrategy
|
||||
Mixtral
|
||||
MosaicML
|
||||
MoEs
|
||||
Mpops
|
||||
Multicore
|
||||
Multithreaded
|
||||
MyEnvironment
|
||||
MyST
|
||||
NANOO
|
||||
NBIO
|
||||
NBIOs
|
||||
NCCL
|
||||
@@ -304,7 +318,6 @@ NIC
|
||||
NICs
|
||||
NLI
|
||||
NLP
|
||||
NN
|
||||
NPKit
|
||||
NPS
|
||||
NSP
|
||||
@@ -341,7 +354,6 @@ OpenMPI
|
||||
OpenSSL
|
||||
OpenVX
|
||||
OpenXLA
|
||||
Optim
|
||||
Oversubscription
|
||||
PagedAttention
|
||||
Pallas
|
||||
@@ -366,7 +378,6 @@ PowerEdge
|
||||
PowerShell
|
||||
Pretrained
|
||||
Pretraining
|
||||
Primus
|
||||
Profiler's
|
||||
PyPi
|
||||
Pytest
|
||||
@@ -381,7 +392,6 @@ RDC's
|
||||
RDMA
|
||||
RDNA
|
||||
README
|
||||
Recomputation
|
||||
RHEL
|
||||
RMW
|
||||
RNN
|
||||
@@ -414,13 +424,11 @@ Ryzen
|
||||
SALU
|
||||
SBIOS
|
||||
SCA
|
||||
ScaledGEMM
|
||||
SDK
|
||||
SDMA
|
||||
SDPA
|
||||
SDRAM
|
||||
SENDMSG
|
||||
SGLang
|
||||
SGPR
|
||||
SGPRs
|
||||
SHA
|
||||
@@ -456,8 +464,6 @@ TCI
|
||||
TCIU
|
||||
TCP
|
||||
TCR
|
||||
TensorRT
|
||||
TensorFloat
|
||||
TF
|
||||
TFLOPS
|
||||
TP
|
||||
@@ -465,8 +471,6 @@ TPS
|
||||
TPU
|
||||
TPUs
|
||||
TSME
|
||||
Taichi
|
||||
Taichi's
|
||||
Tagram
|
||||
TensileLite
|
||||
TensorBoard
|
||||
@@ -502,7 +506,6 @@ Unhandled
|
||||
VALU
|
||||
VBIOS
|
||||
VCN
|
||||
verl's
|
||||
VGPR
|
||||
VGPRs
|
||||
VM
|
||||
@@ -532,7 +535,6 @@ Xilinx
|
||||
Xnack
|
||||
Xteam
|
||||
YAML
|
||||
YAMLs
|
||||
YML
|
||||
YModel
|
||||
ZeRO
|
||||
@@ -548,7 +550,6 @@ allocator
|
||||
allocators
|
||||
amdgpu
|
||||
api
|
||||
aten
|
||||
atmi
|
||||
atomics
|
||||
autogenerated
|
||||
@@ -593,7 +594,6 @@ completers
|
||||
composable
|
||||
concretization
|
||||
config
|
||||
configs
|
||||
conformant
|
||||
constructible
|
||||
convolutional
|
||||
@@ -720,7 +720,6 @@ installable
|
||||
interop
|
||||
interprocedural
|
||||
intra
|
||||
intrinsics
|
||||
invariants
|
||||
invocating
|
||||
ipo
|
||||
@@ -739,14 +738,11 @@ linearized
|
||||
linter
|
||||
linux
|
||||
llvm
|
||||
lm
|
||||
localscratch
|
||||
logits
|
||||
lossy
|
||||
macOS
|
||||
matchers
|
||||
maxtext
|
||||
megatron
|
||||
microarchitecture
|
||||
migraphx
|
||||
migratable
|
||||
@@ -805,9 +801,7 @@ preprocessing
|
||||
preprocessor
|
||||
prequantized
|
||||
prerequisites
|
||||
pretrain
|
||||
pretraining
|
||||
primus
|
||||
profiler
|
||||
profilers
|
||||
protobuf
|
||||
@@ -820,7 +814,6 @@ quantile
|
||||
quantizer
|
||||
quasirandom
|
||||
queueing
|
||||
qwen
|
||||
radeon
|
||||
rccl
|
||||
rdc
|
||||
@@ -829,7 +822,6 @@ reStructuredText
|
||||
redirections
|
||||
refactorization
|
||||
reformats
|
||||
reinforcememt
|
||||
repo
|
||||
repos
|
||||
representativeness
|
||||
@@ -837,7 +829,6 @@ req
|
||||
resampling
|
||||
rescaling
|
||||
reusability
|
||||
RLHF
|
||||
roadmap
|
||||
roc
|
||||
rocAL
|
||||
@@ -875,7 +866,6 @@ roctracer
|
||||
rst
|
||||
runtime
|
||||
runtimes
|
||||
ResNet
|
||||
sL
|
||||
scalability
|
||||
scalable
|
||||
@@ -884,7 +874,6 @@ seealso
|
||||
sendmsg
|
||||
seqs
|
||||
serializers
|
||||
sglang
|
||||
shader
|
||||
sharding
|
||||
sigmoid
|
||||
@@ -892,7 +881,6 @@ sm
|
||||
smi
|
||||
softmax
|
||||
spack
|
||||
spmm
|
||||
src
|
||||
stochastically
|
||||
strided
|
||||
@@ -901,7 +889,6 @@ subdirectory
|
||||
subexpression
|
||||
subfolder
|
||||
subfolders
|
||||
submatrix
|
||||
submodule
|
||||
submodules
|
||||
subnet
|
||||
@@ -922,12 +909,10 @@ toolchain
|
||||
toolchains
|
||||
toolset
|
||||
toolsets
|
||||
torchtitan
|
||||
torchvision
|
||||
tqdm
|
||||
tracebacks
|
||||
txt
|
||||
TopK
|
||||
uarch
|
||||
uncached
|
||||
uncacheable
|
||||
@@ -955,7 +940,6 @@ vectorize
|
||||
vectorized
|
||||
vectorizer
|
||||
vectorizes
|
||||
verl
|
||||
virtualize
|
||||
virtualized
|
||||
vjxb
|
||||
|
||||
7503
CHANGELOG.md
7503
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,9 @@ source software compilers, debuggers, and libraries. ROCm is fully integrated in
|
||||
> A new open source build platform for ROCm is under development at
|
||||
> https://github.com/ROCm/TheRock, featuring a unified CMake build with bundled
|
||||
> dependencies, Windows support, and more.
|
||||
>
|
||||
> The instructions below describe the prior process for building from source
|
||||
> which will be replaced once TheRock is mature enough.
|
||||
|
||||
## Getting and Building ROCm from Source
|
||||
|
||||
|
||||
498
RELEASE.md
498
RELEASE.md
@@ -1,498 +0,0 @@
|
||||
<!-- Do not edit this file! -->
|
||||
<!-- This file is autogenerated with -->
|
||||
<!-- tools/autotag/tag_script.py -->
|
||||
<!-- Disable lints since this is an auto-generated file. -->
|
||||
<!-- markdownlint-disable blanks-around-headers -->
|
||||
<!-- markdownlint-disable no-duplicate-header -->
|
||||
<!-- markdownlint-disable no-blanks-blockquote -->
|
||||
<!-- markdownlint-disable ul-indent -->
|
||||
<!-- markdownlint-disable no-trailing-spaces -->
|
||||
<!-- markdownlint-disable reference-links-images -->
|
||||
<!-- markdownlint-disable no-missing-space-atx -->
|
||||
<!-- spellcheck-disable -->
|
||||
# ROCm 6.4.3 release notes
|
||||
|
||||
The release notes provide a summary of notable changes since the previous ROCm release.
|
||||
|
||||
- [Release highlights](#release-highlights)
|
||||
|
||||
- [Operating system and hardware support changes](#operating-system-and-hardware-support-changes)
|
||||
|
||||
- [ROCm components versioning](#rocm-components)
|
||||
|
||||
- [Detailed component changes](#detailed-component-changes)
|
||||
|
||||
- [ROCm known issues](#rocm-known-issues)
|
||||
|
||||
- [ROCm upcoming changes](#rocm-upcoming-changes)
|
||||
|
||||
```{note}
|
||||
If you’re using AMD Radeon™ PRO or Radeon GPUs in a workstation setting with a display connected, see the [Use ROCm on Radeon GPUs](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility/native_linux/native_linux_compatibility.html)
|
||||
documentation to verify compatibility and system requirements.
|
||||
```
|
||||
|
||||
## Release highlights
|
||||
|
||||
ROCm 6.4.3 is a quality release that resolves the following issues. For changes to individual components, see [Detailed component changes](#detailed-component-changes).
|
||||
|
||||
### AMDGPU driver updates
|
||||
|
||||
* Resolved an issue causing performance degradation in communication operations, caused by increased latency in certain RCCL applications. The fix prevents unnecessary queue eviction during the fork process.
|
||||
* Fixed an issue in the AMDGPU driver’s scheduler constraints that could cause queue preemption to fail during workload execution.
|
||||
|
||||
### ROCm SMI update
|
||||
* Fixed the failure to load GPU data like System Clock (SCLK) by adjusting the logic for retrieving GPU board voltage.
|
||||
|
||||
### ROCm documentation updates
|
||||
|
||||
ROCm documentation continues to be updated to provide clearer and more comprehensive guidance for a wider variety of user needs and use cases.
|
||||
|
||||
* [Tutorials for AI developers](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/) have been expanded with the following five new tutorials:
|
||||
* Inference tutorials
|
||||
* [ChatQnA vLLM deployment and performance evaluation](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/inference/opea_deployment_and_evaluation.html)
|
||||
* [Text-to-video generation with ComfyUI](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/inference/t2v_comfyui_radeon.html)
|
||||
* [DeepSeek Janus Pro on CPU or GPU](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/inference/deepseek_janus_cpu_gpu.html)
|
||||
* [DeepSeek-R1 with vLLM V1](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/inference/vllm_v1_DSR1.html)
|
||||
* GPU development and optimization tutorial: [MLA decoding kernel of AITER library](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/gpu_dev_optimize/aiter_mla_decode_kernel.html)
|
||||
|
||||
For more information about the changes, see [Changelog for the AI Developer Hub](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/changelog.html).
|
||||
|
||||
* ROCm provides a comprehensive ecosystem for deep learning development. For more details, see [Deep learning frameworks for ROCm](https://rocm.docs.amd.com/en/docs-6.4.3/how-to/deep-learning-rocm.html). AMD ROCm adds support for the following deep learning framework:
|
||||
|
||||
* Megablocks is a light-weight library for mixture-of-experts (MoE) training. The core of the system is efficient "dropless-MoE" and standard MoE layers. Megablocks is integrated with Megatron-LM, where data and pipeline parallel training of MoEs is supported. It is currently supported on ROCm 6.3.0. For more information, see [Megablocks compatibility](https://rocm.docs.amd.com/en/docs-6.4.3/compatibility/ml-compatibility/megablocks-compatibility.html).
|
||||
|
||||
* The [Data types and precision support](https://rocm.docs.amd.com/en/latest/reference/precision-support.html) topic now includes new hardware and library support information.
|
||||
|
||||
## Operating system and hardware support changes
|
||||
|
||||
Operating system and hardware support remain unchanged in this release.
|
||||
|
||||
See the [Compatibility
|
||||
matrix](../../docs/compatibility/compatibility-matrix.rst)
|
||||
for more information about operating system and hardware compatibility.
|
||||
|
||||
## ROCm components
|
||||
|
||||
The following table lists the versions of ROCm components for ROCm 6.4.3.
|
||||
Click {fab}`github` to go to the component's source code on GitHub.
|
||||
|
||||
<div class="pst-scrollable-table-container">
|
||||
<table id="rocm-rn-components" class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Category</th>
|
||||
<th>Group</th>
|
||||
<th>Name</th>
|
||||
<th>Version</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<colgroup>
|
||||
<col span="1">
|
||||
<col span="1">
|
||||
</colgroup>
|
||||
<tbody class="rocm-components-libs rocm-components-ml">
|
||||
<tr>
|
||||
<th rowspan="9">Libraries</th>
|
||||
<th rowspan="9">Machine learning and computer vision</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/composable_kernel/en/docs-6.4.3/index.html">Composable Kernel</a></td>
|
||||
<td>1.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/composable_kernel"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/AMDMIGraphX/en/docs-6.4.3/index.html">MIGraphX</a></td>
|
||||
<td>2.12.0</td>
|
||||
<td><a href="https://github.com/ROCm/AMDMIGraphX"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/MIOpen/en/docs-6.4.3/index.html">MIOpen</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/MIOpen"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/MIVisionX/en/docs-6.4.3/index.html">MIVisionX</a></td>
|
||||
<td>3.2.0</td>
|
||||
<td><a href="https://github.com/ROCm/MIVisionX"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocAL/en/docs-6.4.3/index.html">rocAL</a></td>
|
||||
<td>2.2.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocAL"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocDecode/en/docs-6.4.3/index.html">rocDecode</a></td>
|
||||
<td>0.10.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocDecode"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocJPEG/en/docs-6.4.3/index.html">rocJPEG</a></td>
|
||||
<td>0.8.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocJPEG"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocPyDecode/en/docs-6.4.3/index.html">rocPyDecode</a></td>
|
||||
<td>0.3.1</td>
|
||||
<td><a href="https://github.com/ROCm/rocPyDecode"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rpp/en/docs-6.4.3/index.html">RPP</a></td>
|
||||
<td>1.9.10</td>
|
||||
<td><a href="https://github.com/ROCm/rpp"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-libs rocm-components-communication tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="2"></th>
|
||||
<th rowspan="2">Communication</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rccl/en/docs-6.4.3/index.html">RCCL</a></td>
|
||||
<td>2.22.3</td>
|
||||
<td><a href="https://github.com/ROCm/rccl"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocSHMEM/en/docs-6.4.3/index.html">rocSHMEM</a></td>
|
||||
<td>2.0.1</td>
|
||||
<td><a href="https://github.com/ROCm/rocSHMEM"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-libs rocm-components-math tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="16"></th>
|
||||
<th rowspan="16">Math</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.4.3/index.html">hipBLAS</a></td>
|
||||
<td>2.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipBLAS"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipBLASLt/en/docs-6.4.3/index.html">hipBLASLt</a></td>
|
||||
<td>0.12.1</td>
|
||||
<td><a href="https://github.com/ROCm/hipBLASLt"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipFFT/en/docs-6.4.3/index.html">hipFFT</a></td>
|
||||
<td>1.0.18</td>
|
||||
<td><a href="https://github.com/ROCm/hipFFT"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipfort/en/docs-6.4.3/index.html">hipfort</a></td>
|
||||
<td>0.6.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipfort"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipRAND/en/docs-6.4.3/index.html">hipRAND</a></td>
|
||||
<td>2.12.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipRAND"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipSOLVER/en/docs-6.4.3/index.html">hipSOLVER</a></td>
|
||||
<td>2.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipSPARSE/en/docs-6.4.3/index.html">hipSPARSE</a></td>
|
||||
<td>3.2.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipSPARSE"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipSPARSELt/en/docs-6.4.3/index.html">hipSPARSELt</a></td>
|
||||
<td>0.2.3</td>
|
||||
<td><a href="https://github.com/ROCm/hipSPARSELt"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocALUTION/en/docs-6.4.3/index.html">rocALUTION</a></td>
|
||||
<td>3.2.3</td>
|
||||
<td><a href="https://github.com/ROCm/rocALUTION"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocBLAS/en/docs-6.4.3/index.html">rocBLAS</a></td>
|
||||
<td>4.4.1</td></td>
|
||||
<td><a href="https://github.com/ROCm/rocBLAS"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocFFT/en/docs-6.4.3/index.html">rocFFT</a></td>
|
||||
<td>1.0.32</td>
|
||||
<td><a href="https://github.com/ROCm/rocFFT"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocRAND/en/docs-6.4.3/index.html">rocRAND</a></td>
|
||||
<td>3.3.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocRAND"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocSOLVER/en/docs-6.4.3/index.html">rocSOLVER</a></td>
|
||||
<td>3.28.2</td>
|
||||
<td><a href="https://github.com/ROCm/rocSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocSPARSE/en/docs-6.4.3/index.html">rocSPARSE</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocSPARSE"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocWMMA/en/docs-6.4.3/index.html">rocWMMA</a></td>
|
||||
<td>1.7.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocWMMA"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/Tensile/en/docs-6.4.3/src/index.html">Tensile</a></td>
|
||||
<td>4.43.0</td>
|
||||
<td><a href="https://github.com/ROCm/Tensile"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-libs rocm-components-primitives tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="4"></th>
|
||||
<th rowspan="4">Primitives</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipCUB/en/docs-6.4.3/index.html">hipCUB</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipCUB"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipTensor/en/docs-6.4.3/index.html">hipTensor</a></td>
|
||||
<td>1.5.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipTensor"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocPRIM/en/docs-6.4.3/index.html">rocPRIM</a></td>
|
||||
<td>3.4.1</td>
|
||||
<td><a href="https://github.com/ROCm/rocPRIM"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocThrust/en/docs-6.4.3/index.html">rocThrust</a></td>
|
||||
<td>3.3.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocThrust"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-tools rocm-components-system tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="7">Tools</th>
|
||||
<th rowspan="7">System management</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/amdsmi/en/docs-6.4.3/index.html">AMD SMI</a></td>
|
||||
<td>25.5.1</a></td>
|
||||
<td><a href="https://github.com/ROCm/amdsmi"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rdc/en/docs-6.4.3/index.html">ROCm Data Center Tool</a></td>
|
||||
<td>0.3.0</td>
|
||||
<td><a href="https://github.com/ROCm/rdc"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocminfo/en/docs-6.4.3/index.html">rocminfo</a></td>
|
||||
<td>1.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocminfo"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocm_smi_lib/en/docs-6.4.3/index.html">ROCm SMI</a></td>
|
||||
<td>7.5.0 ⇒ <a href="#rocm-smi-7-7-0">7.7.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocm_smi_lib"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/docs-6.4.3/index.html">ROCm Validation Suite</a></td>
|
||||
<td>1.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCmValidationSuite"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-tools rocm-components-perf">
|
||||
<tr>
|
||||
<th rowspan="6"></th>
|
||||
<th rowspan="6">Performance</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocm_bandwidth_test/en/docs-6.4.3/index.html">ROCm Bandwidth
|
||||
Test</a></td>
|
||||
<td>1.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocm_bandwidth_test/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler-compute/en/docs-6.4.3/index.html">ROCm Compute Profiler</a></td>
|
||||
<td>3.1.1</td>
|
||||
<td><a href="https://github.com/ROCm/rocprofiler-compute"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler-systems/en/docs-6.4.3/index.html">ROCm Systems Profiler</a></td>
|
||||
<td>1.0.2</td>
|
||||
<td><a href="https://github.com/ROCm/rocprofiler-systems"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.3/index.html">ROCProfiler</a></td>
|
||||
<td>2.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCProfiler/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/docs-6.4.3/index.html">ROCprofiler-SDK</a></td>
|
||||
<td>0.6.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocprofiler-sdk/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr >
|
||||
<td><a href="https://rocm.docs.amd.com/projects/roctracer/en/docs-6.4.3/index.html">ROCTracer</a></td>
|
||||
<td>4.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCTracer/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-tools rocm-components-dev">
|
||||
<tr>
|
||||
<th rowspan="5"></th>
|
||||
<th rowspan="5">Development</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIPIFY/en/docs-6.4.3/index.html">HIPIFY</a></td>
|
||||
<td>19.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/HIPIFY/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCdbgapi/en/docs-6.4.3/index.html">ROCdbgapi</a></td>
|
||||
<td>0.77.2</td>
|
||||
<td><a href="https://github.com/ROCm/ROCdbgapi/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCmCMakeBuildTools/en/docs-6.4.3/index.html">ROCm CMake</a></td>
|
||||
<td>0.14.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocm-cmake/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCgdb/en/docs-6.4.3/index.html">ROCm Debugger (ROCgdb)</a>
|
||||
</td>
|
||||
<td>15.2</td>
|
||||
<td><a href="https://github.com/ROCm/ROCgdb/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocr_debug_agent/en/docs-6.4.3/index.html">ROCr Debug Agent</a>
|
||||
</td>
|
||||
<td>2.0.4</td>
|
||||
<td><a href="https://github.com/ROCm/rocr_debug_agent/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-compilers tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="2" colspan="2">Compilers</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIPCC/en/docs-6.4.3/index.html">HIPCC</a></td>
|
||||
<td>1.1.1</td>
|
||||
<td><a href="https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.3/index.html">llvm-project</a></td>
|
||||
<td>19.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/llvm-project/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-runtimes tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="2" colspan="2">Runtimes</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.3/index.html">HIP</a></td>
|
||||
<td>6.4.3</td>
|
||||
<td><a href="https://github.com/ROCm/HIP/"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCR-Runtime/en/docs-6.4.3/index.html">ROCr Runtime</a></td>
|
||||
<td>1.15.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCR-Runtime/"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
## Detailed component changes
|
||||
|
||||
The following sections describe key changes to ROCm components.
|
||||
|
||||
```{note}
|
||||
For a historical overview of ROCm component updates, see the {doc}`ROCm consolidated changelog </release/changelog>`.
|
||||
```
|
||||
|
||||
### **ROCm SMI** (7.7.0)
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for getting the GPU Board voltage.
|
||||
|
||||
```{note}
|
||||
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
## ROCm known issues
|
||||
|
||||
ROCm known issues are noted on {fab}`github` [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). For known
|
||||
issues related to individual components, review the [Detailed component changes](#detailed-component-changes).
|
||||
|
||||
## ROCm upcoming changes
|
||||
|
||||
The following changes to the ROCm software stack are anticipated for future releases.
|
||||
|
||||
### AMD SMI migration to AMDGPU driver repository
|
||||
|
||||
In a future release, [AMD SMI](https://github.com/ROCm/amdsmi) will be relocated from the ROCm organization repository to a new AMDTools repository to better align with its system-level functionality. `amd-smi-lib` will no longer be included in the `rocm-developer-tools` meta-package included with your standard ROCm installation. Instead, it will be packaged with the AMDGPU driver installation.
|
||||
|
||||
### ROCm SMI deprecation
|
||||
|
||||
[ROCm SMI](https://github.com/ROCm/rocm_smi_lib) will be phased out in an
|
||||
upcoming ROCm release and will enter maintenance mode. After this transition,
|
||||
only critical bug fixes will be addressed and no further feature development
|
||||
will take place.
|
||||
|
||||
It's strongly recommended to transition your projects to [AMD
|
||||
SMI](https://github.com/ROCm/amdsmi), the successor to ROCm SMI. AMD SMI
|
||||
includes all the features of the ROCm SMI and will continue to receive regular
|
||||
updates, new functionality, and ongoing support. For more information on AMD
|
||||
SMI, see the [AMD SMI documentation](https://rocm.docs.amd.com/projects/amdsmi/en/latest/).
|
||||
|
||||
### ROCTracer, ROCProfiler, rocprof, and rocprofv2 deprecation
|
||||
|
||||
Development and support for ROCTracer, ROCProfiler, `rocprof`, and `rocprofv2` are being phased out in favor of ROCprofiler-SDK in upcoming ROCm releases. Starting with ROCm 6.4, only critical defect fixes will be addressed for older versions of the profiling tools and libraries. All users are encouraged to upgrade to the latest version of the ROCprofiler-SDK library and the (`rocprofv3`) tool to ensure continued support and access to new features. ROCprofiler-SDK is still in beta today and will be production-ready in a future ROCm release.
|
||||
|
||||
It's anticipated that ROCTracer, ROCProfiler, `rocprof`, and `rocprofv2` will reach end-of-life by future releases, aligning with Q1 of 2026.
|
||||
|
||||
### AMDGPU wavefront size compiler macro deprecation
|
||||
|
||||
Access to the wavefront size as a compile-time constant via the `__AMDGCN_WAVEFRONT_SIZE`
|
||||
and `__AMDGCN_WAVEFRONT_SIZE__` macros or the `constexpr warpSize` variable is deprecated
|
||||
and will be disabled in a future release.
|
||||
|
||||
* The `__AMDGCN_WAVEFRONT_SIZE__` macro and `__AMDGCN_WAVEFRONT_SIZE` alias will be removed in an upcoming release.
|
||||
It is recommended to remove any use of this macro. For more information, see
|
||||
[AMDGPU support](https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.3/LLVM/clang/html/AMDGPUSupport.html).
|
||||
* `warpSize` will only be available as a non-`constexpr` variable. Where required,
|
||||
the wavefront size should be queried via the `warpSize` variable in device code,
|
||||
or via `hipGetDeviceProperties` in host code. Neither of these will result in a compile-time constant. For more information, see [warpSize](https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.3/how-to/hip_cpp_language_extensions.html#warpsize).
|
||||
* For cases where compile-time evaluation of the wavefront size cannot be avoided,
|
||||
uses of `__AMDGCN_WAVEFRONT_SIZE`, `__AMDGCN_WAVEFRONT_SIZE__`, or `warpSize`
|
||||
can be replaced with a user-defined macro or `constexpr` variable with the wavefront
|
||||
size(s) for the target hardware. For example:
|
||||
|
||||
```
|
||||
#if defined(__GFX9__)
|
||||
#define MY_MACRO_FOR_WAVEFRONT_SIZE 64
|
||||
#else
|
||||
#define MY_MACRO_FOR_WAVEFRONT_SIZE 32
|
||||
#endif
|
||||
```
|
||||
|
||||
### HIPCC Perl scripts deprecation
|
||||
|
||||
The HIPCC Perl scripts (`hipcc.pl` and `hipconfig.pl`) will be removed in an upcoming release.
|
||||
|
||||
### Changes to ROCm Object Tooling
|
||||
|
||||
ROCm Object Tooling tools ``roc-obj-ls``, ``roc-obj-extract``, and ``roc-obj`` are
|
||||
deprecated in ROCm 6.4, and will be removed in a future release. Functionality
|
||||
has been added to the ``llvm-objdump --offloading`` tool option to extract all
|
||||
clang-offload-bundles into individual code objects found within the objects
|
||||
or executables passed as input. The ``llvm-objdump --offloading`` tool option also
|
||||
supports the ``--arch-name`` option, and only extracts code objects found with
|
||||
the specified target architecture. See [llvm-objdump](https://llvm.org/docs/CommandGuide/llvm-objdump.html)
|
||||
for more information.
|
||||
|
||||
### HIP runtime API changes
|
||||
|
||||
There are a number of upcoming changes planned for HIP runtime API in an upcoming major release
|
||||
that are not backward compatible with prior releases. Most of these changes increase
|
||||
alignment between HIP and CUDA APIs or behavior. Some of the upcoming changes are to
|
||||
clean up header files, remove namespace collision, and have a clear separation between
|
||||
`hipRTC` and HIP runtime. For more information, see [HIP 7.0 Is Coming: What You Need to Know to Stay Ahead](https://rocm.blogs.amd.com/ecosystems-and-partners/transition-to-hip-7.0-blog/README.html).
|
||||
27
default.xml
27
default.xml
@@ -1,12 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/20250912-17"
|
||||
<default revision="refs/tags/rocm-6.4.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
<!--list of projects for ROCm-->
|
||||
<project name="aqlprofile" />
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="amdsmi" />
|
||||
<project name="rdc" />
|
||||
@@ -40,34 +40,33 @@
|
||||
<project groups="mathlibs" name="MIOpen" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="Tensile" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipBLAS-common" />
|
||||
<project groups="mathlibs" name="hipBLAS" />
|
||||
<project groups="mathlibs" name="hipBLASLt" />
|
||||
<project groups="mathlibs" name="hipCUB" />
|
||||
<project groups="mathlibs" name="hipFFT" />
|
||||
<project groups="mathlibs" name="hipRAND" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipSPARSE" />
|
||||
<project groups="mathlibs" name="hipSPARSELt" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocBLAS" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocJPEG" />
|
||||
<project groups="mathlibs" name="rocm-libraries">
|
||||
<linkfile src="projects/hipcub" dest="hipCUB"/>
|
||||
<linkfile src="projects/rocprim" dest="rocPRIM"/>
|
||||
<linkfile src="projects/hiprand" dest="hipRAND"/>
|
||||
<linkfile src="projects/rocrand" dest="rocRAND"/>
|
||||
<linkfile src="projects/rocthrust" dest="rocThrust"/>
|
||||
<linkfile src="projects/hipblas-common" dest="hipBLAS-common"/>
|
||||
<linkfile src="projects/hipblaslt" dest="hipBLASLt"/>
|
||||
<linkfile src="projects/rocblas" dest="rocBLAS"/>
|
||||
<linkfile src="projects/hipsparselt" dest="hipSPARSELt"/>
|
||||
<linkfile src="projects/rocsparse" dest="rocSPARSE"/>
|
||||
</project>
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocFFT" />
|
||||
<project groups="mathlibs" name="rocPRIM" />
|
||||
<project groups="mathlibs" name="rocRAND" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocSPARSE" />
|
||||
<project groups="mathlibs" name="rocThrust" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="ROCm licensing terms">
|
||||
<meta name="keywords" content="license, licensing terms">
|
||||
</head>
|
||||
|
||||
# ROCm license
|
||||
|
||||
```{include} ../../LICENSE
|
||||
```
|
||||
|
||||
:::{note}
|
||||
The preceding license applies to the [ROCm repository](https://github.com/ROCm/ROCm), which
|
||||
primarily contains documentation. For licenses related to other ROCm components, refer to the
|
||||
following section.
|
||||
:::
|
||||
|
||||
## ROCm component licenses
|
||||
|
||||
ROCm is released by Advanced Micro Devices, Inc. (AMD) and is licensed per component separately.
|
||||
The following table is a list of ROCm components with links to their respective license
|
||||
terms. These components may include third party components subject to
|
||||
additional licenses. Please review individual repositories for more information.
|
||||
|
||||
<!-- spellcheck-disable -->
|
||||
| Component | License |
|
||||
|:---------------------|:-------------------------|
|
||||
| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/LICENSE.txt) |
|
||||
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
|
||||
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
|
||||
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
|
||||
| [AQLprofile] | [MIT](https://github.com/ROCm/aqlprofile/blob/amd-staging/LICENSE) |
|
||||
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
|
||||
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
|
||||
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
|
||||
| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipamd](https://github.com/ROCm/clr/tree/amd-staging/hipamd) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/hipamd/LICENSE.txt) |
|
||||
| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
|
||||
| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
|
||||
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
|
||||
| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
|
||||
| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
|
||||
| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
|
||||
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
|
||||
| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
|
||||
| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
|
||||
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
|
||||
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
|
||||
| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
|
||||
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
|
||||
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
|
||||
| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
|
||||
| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
|
||||
| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
|
||||
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
|
||||
| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
|
||||
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) |
|
||||
| [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) |
|
||||
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
|
||||
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
|
||||
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
|
||||
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
|
||||
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
|
||||
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
|
||||
| [ROCm Compute Profiler](https://github.com/ROCm/rocprofiler-compute) | [MIT](https://github.com/ROCm/rocprofiler-compute/blob/amd-staging/LICENSE) |
|
||||
| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/amd-staging/LICENSE) |
|
||||
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
|
||||
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/amd-staging/opencl) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/opencl/LICENSE.txt) |
|
||||
| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
|
||||
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/amd-staging/License.txt) |
|
||||
| [ROCm Systems Profiler](https://github.com/ROCm/rocprofiler-systems) | [MIT](https://github.com/ROCm/rocprofiler-systems/blob/amd-staging/LICENSE) |
|
||||
| [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
|
||||
| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
|
||||
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-staging/LICENSE) |
|
||||
| [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
|
||||
| [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) |
|
||||
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
|
||||
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
|
||||
| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
|
||||
| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
|
||||
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
|
||||
| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
|
||||
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
|
||||
Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com).
|
||||
Proprietary components are organized in a proprietary subdirectory in the package
|
||||
repositories to distinguish from open sourced packages.
|
||||
|
||||
```{note}
|
||||
The following additional terms and conditions apply to your use of ROCm technical documentation.
|
||||
```
|
||||
|
||||
©2023 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
The information presented in this document is for informational purposes only
|
||||
and may contain technical inaccuracies, omissions, and typographical errors. The
|
||||
information contained herein is subject to change and may be rendered inaccurate
|
||||
for many reasons, including but not limited to product and roadmap changes,
|
||||
component and motherboard version changes, new model and/or product releases,
|
||||
product differences between differing manufacturers, software changes, BIOS
|
||||
flashes, firmware upgrades, or the like. Any computer system has risks of
|
||||
security vulnerabilities that cannot be completely prevented or mitigated. AMD
|
||||
assumes no obligation to update or otherwise correct or revise this information.
|
||||
However, AMD reserves the right to revise this information and to make changes
|
||||
from time to time to the content hereof without obligation of AMD to notify any
|
||||
person of such revisions or changes.
|
||||
|
||||
THIS INFORMATION IS PROVIDED “AS IS.” AMD MAKES NO REPRESENTATIONS OR WARRANTIES
|
||||
WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY
|
||||
INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS INFORMATION. AMD
|
||||
SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF NON-INFRINGEMENT,
|
||||
MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. IN NO EVENT WILL AMD BE
|
||||
LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, INDIRECT, SPECIAL, OR OTHER
|
||||
CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY INFORMATION CONTAINED HEREIN,
|
||||
EVEN IF AMD IS EXPRESSLY ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
AMD, the AMD Arrow logo, ROCm, and combinations thereof are trademarks of
|
||||
Advanced Micro Devices, Inc. Other product names used in this publication are
|
||||
for identification purposes only and may be trademarks of their respective
|
||||
companies.
|
||||
|
||||
### Package licensing
|
||||
|
||||
:::{attention}
|
||||
ROCprof Trace Decoder and AOCC CPU optimizations are provided in binary form, subject to the license agreement enclosed on [GitHub](https://github.com/ROCm/rocprof-trace-decoder/blob/amd-mainline/LICENSE) for ROCprof Trace Decoder, and [Developer Central](https://www.amd.com/en/developer/aocc.html) for AOCC. By using, installing,
|
||||
copying or distributing ROCprof Trace Decoder or AOCC CPU Optimizations, you agree to
|
||||
the terms and conditions of this license agreement. If you do not agree to the
|
||||
terms of this agreement, do not install, copy or use ROCprof Trace Decoder or the
|
||||
AOCC CPU Optimizations.
|
||||
:::
|
||||
|
||||
For the rest of the ROCm packages, you can find the licensing information at the
|
||||
following location: `/opt/rocm/share/doc/<component-name>/` or in the locations
|
||||
specified in the preceding table.
|
||||
|
||||
For example, you can fetch the licensing information of the `amd_comgr`
|
||||
component (Code Object Manager) from the `/opt/rocm/share/doc/amd_comgr/LICENSE.txt` file.
|
||||
55
docs/benchmark-docker/index.rst
Normal file
55
docs/benchmark-docker/index.rst
Normal file
@@ -0,0 +1,55 @@
|
||||
***************************************************
|
||||
AI training and inference performance with ROCm 7.0
|
||||
***************************************************
|
||||
|
||||
AMD ROCm is an open-source software platform optimized to extract HPC and AI
|
||||
workload performance from AMD Instinct™ accelerators and GPUs while maintaining
|
||||
compatibility with industry software frameworks.
|
||||
|
||||
.. note::
|
||||
|
||||
ROCm 7.0 is now available. See the documentation at `ROCm 7.0 documentation
|
||||
<https://rocm.docs.amd.com/en/docs-7.0.0/>`__.
|
||||
|
||||
This documentation accompanies preview Docker images designed to reproduce
|
||||
training and inference performance on AMD Instinct™ MI355X, MI350X, and MI300X
|
||||
series accelerators with ROCm 7.0. The images provide the 7.0 release of the
|
||||
ROCm software stack and are targeted at users evaluating AI inference workloads
|
||||
using next-generation AMD accelerators. See the Docker image repository at
|
||||
`rocm/7.0 <https://hub.docker.com/r/rocm/7.0/>`__.
|
||||
|
||||
.. important::
|
||||
|
||||
The following AI workload benchmarks use ROCm 7.0 on AMD Instinct MI355X,
|
||||
MI350X, and MI300X series accelerators.
|
||||
|
||||
For other workloads for MI300X series accelerators, see
|
||||
`Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html>`_.
|
||||
|
||||
.. grid:: 2
|
||||
|
||||
.. grid-item-card:: Training
|
||||
|
||||
* :doc:`training-maxtext-llama-3`
|
||||
|
||||
* :doc:`training-maxtext-mixtral-8x7b`
|
||||
|
||||
* :doc:`training-megatron-lm-llama-3`
|
||||
|
||||
* :doc:`training-mlperf-fine-tuning-llama-2-70b`
|
||||
|
||||
* :doc:`training-torchtitan-llama-3`
|
||||
|
||||
.. grid-item-card:: Inference
|
||||
|
||||
* :doc:`inference-vllm-llama-3.1-405b-fp4`
|
||||
|
||||
* :doc:`inference-vllm-llama-3.3-70b-fp8`
|
||||
|
||||
* :doc:`inference-vllm-deepseek-r1-fp8`
|
||||
|
||||
* :doc:`inference-vllm-gpt-oss-120b`
|
||||
|
||||
* :doc:`inference-sglang-deepseek-r1-fp4`
|
||||
|
||||
* :doc:`inference-sglang-deepseek-r1-fp8`
|
||||
99
docs/benchmark-docker/inference-sglang-deepseek-r1-fp4.rst
Normal file
99
docs/benchmark-docker/inference-sglang-deepseek-r1-fp4.rst
Normal file
@@ -0,0 +1,99 @@
|
||||
***********************************************
|
||||
Benchmark DeepSeek R1 FP4 inference with SGLang
|
||||
***********************************************
|
||||
|
||||
This section provides instructions to test the inference performance of
|
||||
DeepSeek R1 with FP4 precision via the SGLang serving framework. The
|
||||
accompanying Docker image integrates `ROCm 7.0
|
||||
<https://rocm.docs.amd.com/en/latest/>`__ with SGLang, and is supported only on AMD
|
||||
Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the benchmark.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image
|
||||
<https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
See the model card on Hugging Face at `DeepSeek-R1-MXFP4-Preview
|
||||
<https://huggingface.co/amd/DeepSeek-R1-MXFP4-Preview>`__. This model uses
|
||||
microscaling 4-bit floating point (MXFP4) quantization through `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \
|
||||
huggingface-cli download amd/DeepSeek-R1-0528-MXFP4-Preview --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--user root \
|
||||
--group-add video \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /app/ \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--shm-size 64G \
|
||||
--mount type=bind,src=/data,dst=/data \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-e SGLANG_USE_AITER=1 \
|
||||
rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path amd/DeepSeek-R1-0528-MXFP4-Preview \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--tensor-parallel-size 8 \
|
||||
--trust-remote-code \
|
||||
--chunked-prefill-size 196608 \
|
||||
--mem-fraction-static 0.8 \
|
||||
--disable-radix-cache \
|
||||
--num-continuous-decode-steps 4 \
|
||||
--max-prefill-tokens 196608 \
|
||||
--cuda-graph-max-bs 128 &
|
||||
|
||||
3. Run the benchmark with the following options.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=64
|
||||
num_prompts=128
|
||||
|
||||
python3 -m sglang.bench_serving \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--model amd/DeepSeek-R1-0528-MXFP4-Preview \
|
||||
--dataset-name random \
|
||||
--random-input ${input_tokens} \
|
||||
--random-output ${output_tokens} \
|
||||
--random-range-ratio 1.0 \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompt ${num_prompts}
|
||||
|
||||
132
docs/benchmark-docker/inference-sglang-deepseek-r1-fp8.rst
Normal file
132
docs/benchmark-docker/inference-sglang-deepseek-r1-fp8.rst
Normal file
@@ -0,0 +1,132 @@
|
||||
***********************************************
|
||||
Benchmark DeepSeek R1 FP8 inference with SGLang
|
||||
***********************************************
|
||||
|
||||
This section provides instructions to test the inference performance of DeepSeek R1
|
||||
with FP8 precision via the SGLang serving framework.
|
||||
The accompanying Docker image integrates ROCm 7.0 with SGLang, and is
|
||||
supported on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the benchmark.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the appropriate `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__
|
||||
for your system.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MI355X and MI350X
|
||||
:sync: mi35x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
.. tab-item:: MI300X series
|
||||
:sync: mi30x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi30x-20250915
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
See the model card on Hugging Face at `deepseek-ai/DeepSeek-R1-0528
|
||||
<https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \
|
||||
huggingface-cli download deepseek-ai/DeepSeek-R1-0528 --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MI355X and MI350X
|
||||
:sync: mi35x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--user root \
|
||||
--group-add video \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /app/ \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--shm-size 64G \
|
||||
--mount type=bind,src=/data,dst=/data \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-e SGLANG_USE_AITER=1 \
|
||||
rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
.. tab-item:: MI300X series
|
||||
:sync: mi30x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--user root \
|
||||
--group-add video \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /app/ \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--shm-size 64G \
|
||||
--mount type=bind,src=/data,dst=/data \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-e SGLANG_USE_AITER=1 \
|
||||
rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi30x-20250915
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-R1-0528 \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--tensor-parallel-size 8 \
|
||||
--trust-remote-code \
|
||||
--chunked-prefill-size 196608 \
|
||||
--mem-fraction-static 0.8 \
|
||||
--disable-radix-cache \
|
||||
--num-continuous-decode-steps 4 \
|
||||
--max-prefill-tokens 196608 \
|
||||
--cuda-graph-max-bs 128 &
|
||||
|
||||
3. Run the benchmark with the following options.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=64
|
||||
num_prompts=128
|
||||
|
||||
python3 -m sglang.bench_serving \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--model deepseek-ai/DeepSeek-R1-0528 \
|
||||
--dataset-name random \
|
||||
--random-input ${input_tokens} \
|
||||
--random-output ${output_tokens} \
|
||||
--random-range-ratio 1.0 \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompt ${num_prompts}
|
||||
|
||||
122
docs/benchmark-docker/inference-vllm-deepseek-r1-fp8.rst
Normal file
122
docs/benchmark-docker/inference-vllm-deepseek-r1-fp8.rst
Normal file
@@ -0,0 +1,122 @@
|
||||
************************************************
|
||||
Benchmark DeepSeek R1 FP8 inference with vLLM
|
||||
************************************************
|
||||
|
||||
This section provides instructions to test the inference performance of DeepSeek R1
|
||||
with FP8 precision on the vLLM inference engine. The provided Docker image integrates
|
||||
`ROCm 7.0 <https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with vLLM.
|
||||
This benchmark supports AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the benchmark.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`deepseek-ai/DeepSeek-R1-0528 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=deepseek-ai/DeepSeek-R1-0528
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=deepseek-ai/DeepSeek-R1-0528
|
||||
max_model_len=16384 # Must be >= the input + the output lengths.
|
||||
max_seq_len_to_capture=10240 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024
|
||||
max_num_batched_tokens=131072 # Smaller values may result in better TTFT but worse TPOT / throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
# Note: this flag may not be compatible with MI325X GPUs
|
||||
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
# Note: Using `--kv-cache-dtype fp8` with DeepSeek may cause accuracy issues
|
||||
vllm serve ${model} \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--no-enable-prefix-caching \
|
||||
--max-num-batched-tokens ${max_num_batched_tokens} \
|
||||
--max-model-len ${max_model_len} \
|
||||
--block-size 1 \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
# Run the client benchmark
|
||||
input_tokens=8192
|
||||
output_tokens=1024
|
||||
max_concurrency=4
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
163
docs/benchmark-docker/inference-vllm-gpt-oss-120b.rst
Normal file
163
docs/benchmark-docker/inference-vllm-gpt-oss-120b.rst
Normal file
@@ -0,0 +1,163 @@
|
||||
**********************************************
|
||||
Benchmark GPT OSS 120B inference with vLLM
|
||||
**********************************************
|
||||
|
||||
This section provides instructions to test the inference performance of OpenAI
|
||||
GPT OSS 120B on the vLLM inference engine. The provided Docker
|
||||
image integrates `ROCm 7.0
|
||||
<https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with
|
||||
vLLM. This benchmark supports AMD Instinct MI355X, MI350X, MI325X, and MI300X
|
||||
GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`openai/gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=openai/gpt-oss-120b
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MI355X and MI350X
|
||||
:sync: mi35x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=openai/gpt-oss-120b
|
||||
max_model_len=10368 # 1.125 x (input sequence length + output sequence length); e.g. 1.125 x (8192 + 1024) = 10368.
|
||||
max_seq_len_to_capture=10368 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024 # Set to max_concurrency of the client to get better throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
export VLLM_ROCM_USE_AITER_FUSED_MOE_A16W4=1
|
||||
|
||||
vllm serve ${model} \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--compilation-config '{"compile_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,256,512,1024,2048,8192] , "cudagraph_capture_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,136,144,152,160,168,176,184,192,200,208,216,224,232,240,248,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,520,528,536,544,552,560,568,576,584,592,600,608,616,624,632,640,648,656,664,672,680,688,696,704,712,720,728,736,744,752,760,768,776,784,792,800,808,816,824,832,840,848,856,864,872,880,888,896,904,912,920,928,936,944,952,960,968,976,984,992,1000,1008,1016,1024,2048,4096,8192] , "cudagraph_mode": "FULL_AND_PIECEWISE"}' \
|
||||
--block-size=64 \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
.. tab-item:: MI325X and MI300X
|
||||
:sync: mi30x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=openai/gpt-oss-120b
|
||||
max_model_len=10368 # 1.125 x (input sequence length + output sequence length); e.g. 1.125 x (8192 + 1024) = 10368.
|
||||
max_seq_len_to_capture=10368 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024 # Set to max_concurrency of the client to get better throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
|
||||
|
||||
# Set this flag for MI300X only; it is not yet compatible with MI325X.
|
||||
# export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
vllm serve ${model} \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--compilation-config '{"cudagraph_mode": "FULL_AND_PIECEWISE"}' \
|
||||
--block-size=64 \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Run the client benchmark
|
||||
model=openai/gpt-oss-120b
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=4
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
193
docs/benchmark-docker/inference-vllm-llama-3.1-405b-fp4.rst
Normal file
193
docs/benchmark-docker/inference-vllm-llama-3.1-405b-fp4.rst
Normal file
@@ -0,0 +1,193 @@
|
||||
************************************************
|
||||
Benchmark Llama 3.3/3.1 FP4 inference with vLLM
|
||||
************************************************
|
||||
|
||||
This section provides instructions to test the inference performance of Llama
|
||||
3.3 70B and Llama 3.1 405B with MXFP4 precision on the vLLM inference engine.
|
||||
The provided Docker image integrates `ROCm 7.0
|
||||
<https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with vLLM.
|
||||
This benchmark supports AMD Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B MXFP4
|
||||
:sync: Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.3-70B-Instruct-MXFP4-Preview <https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview>`__.
|
||||
This model uses FP4 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
.. tab-item:: Llama 3.1 405B MXFP4
|
||||
:sync: Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.1-405B-Instruct-MXFP4-Preview <https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview>`__.
|
||||
This model uses FP4 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B MXFP4
|
||||
:sync: Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. tab-item:: Llama 3.1 405B MXFP4
|
||||
:sync: Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
max_model_len=10240 # Must be >= the input + the output lengths.
|
||||
max_seq_len_to_capture=10240 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024
|
||||
max_num_batched_tokens=131072 # Smaller values may result in better TTFT but worse TPOT / throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
# The following setting is recommended for most configurations:
|
||||
export VLLM_TRITON_FP4_GEMM_USE_ASM=1
|
||||
|
||||
# For tensor parallelism >1 at low concurrency (<= 16 for input length 1024, <= 4 for input length 8192),
|
||||
# uncomment these lines:
|
||||
# export VLLM_TRITON_FP4_GEMM_USE_ASM=0
|
||||
# export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
|
||||
|
||||
# 0 is recommended for most configurations.
|
||||
# 1 (the default) is faster for input lengths of 8192 with concurrency > 16.
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
|
||||
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
vllm serve ${model} \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--kv-cache-dtype fp8 \
|
||||
--gpu-memory-utilization 0.94 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--max-num-batched-tokens ${max_num_batched_tokens} \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B MXFP4
|
||||
:sync: Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. tab-item:: Llama 3.1 405B MXFP4
|
||||
:sync: Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Run the client benchmark
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=64
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
189
docs/benchmark-docker/inference-vllm-llama-3.3-70b-fp8.rst
Normal file
189
docs/benchmark-docker/inference-vllm-llama-3.3-70b-fp8.rst
Normal file
@@ -0,0 +1,189 @@
|
||||
************************************************
|
||||
Benchmark Llama 3.3/3.1 FP8 inference with vLLM
|
||||
************************************************
|
||||
|
||||
This section provides instructions to test the inference performance of Llama
|
||||
3.3 70B and Llama 3.1 405B with FP8 precision on the vLLM inference engine. The provided Docker image integrates
|
||||
`ROCm 7.0 <https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with vLLM.
|
||||
This benchmark supports AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
In the following snippet, set ``HF_TOKEN`` to your access token.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B FP8
|
||||
:sync: Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.3-70B-Instruct-FP8-KV <https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV>`__.
|
||||
This model uses FP8 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
.. tab-item:: Llama 3.1 405B FP8
|
||||
:sync: Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.1-405B-Instruct-FP8-KV <https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV>`__.
|
||||
This model uses FP8 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server. On MI300X and MI325X GPUs, include ``--dtype float16`` in your ``vllm serve`` arguments.
|
||||
This is not necessary on MI350X and MI355X GPUs.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B FP8
|
||||
:sync: Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. tab-item:: Llama 3.1 405B FP8
|
||||
:sync: Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
max_model_len=10240 # Must be >= the input + the output lengths.
|
||||
max_seq_len_to_capture=10240 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024
|
||||
max_num_batched_tokens=131072 # Smaller values may result in better TTFT but worse TPOT / throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
# Note: this flag may not be compatible with MI325X GPUs
|
||||
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
# 0 is recommended for most configurations.
|
||||
# 1 (the default) is faster for input lengths of 8192 with concurrency > 16.
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
|
||||
vllm serve ${model} \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--distributed-executor-backend mp \
|
||||
--kv-cache-dtype fp8 \
|
||||
--gpu-memory-utilization 0.94 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--max-num-batched-tokens ${max_num_batched_tokens} \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B FP8
|
||||
:sync: Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. tab-item:: Llama 3.1 405B FP8
|
||||
:sync: Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Run the client benchmark
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=4
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
122
docs/benchmark-docker/training-maxtext-llama-3.rst
Normal file
122
docs/benchmark-docker/training-maxtext-llama-3.rst
Normal file
@@ -0,0 +1,122 @@
|
||||
*******************************************
|
||||
Benchmark Llama 3 pre-training with MaxText
|
||||
*******************************************
|
||||
|
||||
This page describes how to benchmark Llama 3 8B and 70B pre-training using the
|
||||
MaxText framework. It includes configurations for both
|
||||
FP8 and BF16 precision to measure throughput. The provided Docker
|
||||
image integrates ROCm 7.0 with MaxText
|
||||
-- and is supported on AMD Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/maxtext \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b_env.sh
|
||||
bash llama3_8b_env.sh
|
||||
python3 -m MaxText.train llama3_8b.yml
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b_env.sh
|
||||
bash llama3_8b_env.sh
|
||||
python3 -m MaxText.train llama3_8b.yml quantization=fp8
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_70b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_70b_env.sh
|
||||
bash llama3_70b_env.sh
|
||||
python3 -m MaxText.train llama3_70b.yml
|
||||
|
||||
.. rubric:: Options
|
||||
|
||||
The ``MaxText.train`` script accepts the following options:
|
||||
|
||||
* ``per_device_batch_size``: Per-device batch size
|
||||
|
||||
* ``quantization``: quantization
|
||||
|
||||
* ``max_target_length``: Maximum input token sequence length
|
||||
|
||||
* ``steps``: Number of training iterations to execute
|
||||
|
||||
See this base [config](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/configs/base.yml)
|
||||
for the full list of settings you can change.
|
||||
|
||||
Other supported models
|
||||
======================
|
||||
|
||||
* Llama-2-7B FP8 and BF16
|
||||
|
||||
* Llama-2-70B FP8 and BF16
|
||||
|
||||
* Llama-3.3-70B BF16
|
||||
|
||||
* DeepSeek-V2-Lite FP8 and BF16
|
||||
|
||||
* Mixtral 8x7B FP8 and BF16
|
||||
|
||||
Known issue
|
||||
===========
|
||||
|
||||
Some models and configurations may trigger a "Memory Access Fault" error.
|
||||
Updates to improve stability are planned for upcoming releases.
|
||||
111
docs/benchmark-docker/training-maxtext-mixtral-8x7b.rst
Normal file
111
docs/benchmark-docker/training-maxtext-mixtral-8x7b.rst
Normal file
@@ -0,0 +1,111 @@
|
||||
************************************************
|
||||
Benchmark Mixtral 8x7B pre-training with MaxText
|
||||
************************************************
|
||||
|
||||
This page describes how to benchmark the Mixtral 8x7B pre-training using the
|
||||
MaxText framework. It includes configurations for both
|
||||
FP8 and BF16 precision to measure throughput. The provided Docker
|
||||
image integrates a ROCm 7.0 with MaxText
|
||||
-- and is supported on AMD Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/maxtext \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Mixtral 8x7B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b_env.sh
|
||||
bash mixtral_8x7b_env.sh
|
||||
python3 -m MaxText.train mixtral_8x7b.yml
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b_env.sh
|
||||
bash mixtral_8x7b_env.sh
|
||||
python3 -m MaxText.train mixtral_8x7b.yml quantization=fp8
|
||||
|
||||
.. rubric:: Options
|
||||
|
||||
The ``MaxText.train`` script accepts the following options:
|
||||
|
||||
* ``per_device_batch_size``: Per-device batch size
|
||||
|
||||
* ``quantization``: quantization
|
||||
|
||||
* ``max_target_length``: Maximum input token sequence length
|
||||
|
||||
* ``steps``: Number of training iterations to execute
|
||||
|
||||
See this base [config](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/configs/base.yml)
|
||||
for the full list of settings you can change.
|
||||
|
||||
Other supported models
|
||||
======================
|
||||
|
||||
* Llama-2-7B FP8 and BF16
|
||||
|
||||
* Llama-2-70B FP8 and BF16
|
||||
|
||||
* Llama-3.1-8B FP8 and BF16
|
||||
|
||||
* Llama-3.1-70B BF16
|
||||
|
||||
* Llama-3.3-70B BF16
|
||||
|
||||
* DeepSeek-V2-Lite FP8 and BF16
|
||||
|
||||
Known issue
|
||||
===========
|
||||
|
||||
Some models and configurations may trigger a "Memory Access Fault" error.
|
||||
Updates to improve stability are planned for upcoming releases.
|
||||
171
docs/benchmark-docker/training-megatron-lm-llama-3.rst
Normal file
171
docs/benchmark-docker/training-megatron-lm-llama-3.rst
Normal file
@@ -0,0 +1,171 @@
|
||||
***********************************************
|
||||
Benchmark Llama 3 pre-training with Megatron-LM
|
||||
***********************************************
|
||||
|
||||
This page describes how to benchmark Llama 3 8B and 70B pre-training using the
|
||||
Megatron-LM framework. It includes configurations for both FP8 and BF16
|
||||
precision to measure throughput. The provided Docker image integrates ROCm
|
||||
7.0 with Megatron-LM -- and is supported on AMD Instinct MI355X and MI350X
|
||||
GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/Megatron-LM \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
TEE_OUTPUT=1 \
|
||||
MBS=4 \
|
||||
BS=512 \
|
||||
TP=1 \
|
||||
TE_FP8=0 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=8 \
|
||||
TOTAL_ITERS=10 \
|
||||
GEMM_TUNING=1 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
TEE_OUTPUT=1 \
|
||||
MBS=4 \
|
||||
BS=512 \
|
||||
TP=1 \
|
||||
TE_FP8=1 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=8 \
|
||||
TOTAL_ITERS=10 \
|
||||
GEMM_TUNING=0 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CKPT_FORMAT=torch_dist \
|
||||
TEE_OUTPUT=1 \
|
||||
MBS=3 \
|
||||
BS=24 \
|
||||
TP=1 \
|
||||
TE_FP8=0 \
|
||||
FSDP=1 \
|
||||
RECOMPUTE=1 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=70 \
|
||||
TOTAL_ITERS=10 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CKPT_FORMAT=torch_dist \
|
||||
TEE_OUTPUT=1 \
|
||||
RECOMPUTE=1 \
|
||||
MBS=3 \
|
||||
BS=24 \
|
||||
TP=1 \
|
||||
TE_FP8=1 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=70 \
|
||||
FSDP=1 \
|
||||
TOTAL_ITERS=10 \
|
||||
NUM_LAYERS=40 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. rubric:: Options
|
||||
|
||||
The ``train_llama3.sh`` script accepts the following options:
|
||||
|
||||
* ``MBS``: Micro-batch size per GPU
|
||||
|
||||
* ``BS``: Global batch size
|
||||
|
||||
* ``TP``: Tensor parallelism
|
||||
|
||||
* ``SEQ_LENGTH``: Maximum input token sequence length
|
||||
|
||||
* ``TE_FP8``: Toggle to enable FP8
|
||||
|
||||
* ``TOTAL_ITERS``: Number of training iterations to execute
|
||||
|
||||
Other supported models
|
||||
======================
|
||||
|
||||
* Llama-2-7B
|
||||
|
||||
* Llama-3.3-70B
|
||||
|
||||
* DeepSeek-V2-Lite
|
||||
|
||||
* Mixtral-8x7B
|
||||
|
||||
* Qwen-2.5-7B
|
||||
|
||||
* Qwen-2.5-72B
|
||||
|
||||
Known issues
|
||||
============
|
||||
|
||||
- Some models and configurations may trigger a "Memory Access Fault" error.
|
||||
Updates to improve stability are planned for upcoming releases.
|
||||
|
||||
- A regression related to Composable Kernel (CK) may cause errors in certain
|
||||
cases. A fix is planned for an upcoming release.
|
||||
|
||||
- Flash Attention forward may not pick the best performing ASM kernel with batch
|
||||
size greater than 1 and impact model performance.
|
||||
For best performance on MI355X and MI350X GPUs, use the ROCm 7.0 Preview beta Docker
|
||||
(``rocm/7.0-preview:rocm7.0_preview_pytorch_training_mi35x_beta``).
|
||||
@@ -0,0 +1,151 @@
|
||||
**************************************************
|
||||
Benchmark Llama 2 70B LoRA fine-tuning with MLPerf
|
||||
**************************************************
|
||||
|
||||
This guide provides instructions to benchmark LoRA fine-tuning on the Llama 2
|
||||
70B model. The benchmark follows the MLPerf training submission for
|
||||
long-document summarization using the GovReport dataset.
|
||||
|
||||
The provided Docker image integrates the `ROCm 7.0 <https://rocm.docs.amd.com/en/latest/>`__
|
||||
software stack and is optimized for AMD Instinct MI355X, MI350X, MI325X, and MI300X
|
||||
GPUs.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
1. Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915
|
||||
|
||||
2. Copy the benchmark scripts from the container to your host. These scripts
|
||||
are used to configure the environment and launch the benchmark.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
container_id=$(docker create rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915) && \
|
||||
docker cp $container_id:/workspace/code/runtime_tunables.sh . && \
|
||||
docker cp $container_id:/workspace/code/run_with_docker.sh . && \
|
||||
docker cp $container_id:/workspace/code/config_MI355X_1x8x1.sh . && \
|
||||
docker rm $container_id
|
||||
|
||||
.. note::
|
||||
|
||||
The ``config_*.sh`` files contain system-specific hyperparameters used in
|
||||
the :ref:`run step <system-config>`. You will need to copy the one that
|
||||
matches your hardware configuration.
|
||||
|
||||
Prepare the GovReport dataset
|
||||
=============================
|
||||
|
||||
This benchmark uses the Llama 2 70B model with fused QKV and the GovReport dataset.
|
||||
GovReport is a dataset for long document summarization that consists of
|
||||
reports written by government research agencies. The dataset hosted on the
|
||||
MLPerf drive is already tokenized and packed so that each sequence has
|
||||
length 8192.
|
||||
|
||||
1. Download and preprocess the dataset.
|
||||
|
||||
Start the Docker container by mounting the volume you want to use for
|
||||
downloading the data under ``/data`` within the container. This example uses
|
||||
``/data/mlperf_llama2`` as the host's download directory:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--net=host \
|
||||
--uts=host \
|
||||
--ipc=host \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--privileged \
|
||||
--security-opt=seccomp=unconfined \
|
||||
--volume=/data/mlperf_llama2:/data \
|
||||
--volume=/data/mlperf_llama2/model:/ckpt \
|
||||
rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915
|
||||
|
||||
2. From within the container, run the preparation script. This will download and
|
||||
preprocess the dataset and model.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
bash ./scripts/prepare_data_and_model.sh
|
||||
|
||||
3. Verify the preprocessed files. After the script completes, check for the
|
||||
following files in the mounted directories.
|
||||
|
||||
After preprocessing, you should see the following files in the ``/data/model`` directory:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
<hash>_tokenizer.model llama2-70b.nemo
|
||||
model_config.yaml model_weights
|
||||
|
||||
And the following files in ``/data/data``:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
train.npy validation.npy
|
||||
|
||||
4. Exit the container and return to your host shell.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
exit
|
||||
|
||||
Run the benchmark
|
||||
=================
|
||||
|
||||
With the dataset prepared, you can now configure and run the fine-tuning
|
||||
benchmark from your host machine.
|
||||
|
||||
1. Set the environment variables. These variables point to the directories you used
|
||||
for data, the model, and where the resulting logs should be stored.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export DATADIR=/data/mlperf_llama2
|
||||
export LOGDIR=/data/mlperf_llama2/results
|
||||
export CONT=rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915
|
||||
|
||||
.. tip::
|
||||
|
||||
Ensure the log directory exists and is writable by the container user.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
mkdir -p $LOGDIR
|
||||
sudo chmod -R 777 $LOGDIR
|
||||
|
||||
.. _system-config:
|
||||
|
||||
2. Source the system-specific configuration file. The ``config_*.sh`` files
|
||||
contain optimized hyperparameters for different hardware configurations.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Use the appropriate config
|
||||
source config_MI355X_1x8x1.sh
|
||||
|
||||
3. To perform a single training run, use the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export NEXP=1
|
||||
bash run_with_docker.sh
|
||||
|
||||
Optionally, to perform 10 consecutive training runs:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export NEXP=10
|
||||
bash run_with_docker.sh
|
||||
|
||||
.. note::
|
||||
|
||||
To optimize performance, the ``run_with_docker.sh`` script automatically
|
||||
executes ``runtime_tunables.sh`` to apply system-level optimizations
|
||||
before starting the training job.
|
||||
|
||||
Upon run completion, the logs will be available under ``$LOGDIR``.
|
||||
113
docs/benchmark-docker/training-torchtitan-llama-3.rst
Normal file
113
docs/benchmark-docker/training-torchtitan-llama-3.rst
Normal file
@@ -0,0 +1,113 @@
|
||||
**********************************************
|
||||
Benchmark Llama 3 pre-training with torchtitan
|
||||
**********************************************
|
||||
|
||||
This page describes how to benchmark Llama 3 8B and 70B pre-training using
|
||||
torchtitan. The provided Docker image integrates
|
||||
ROCm 7.0 with torchtitan -- and is tailored for AMD Instinct MI355X and MI350X
|
||||
GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/torchtitan \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Download the Llama 3 tokenizer. Make sure to set ``HF_TOKEN`` using
|
||||
a valid Hugging Face access token with Llama model permissions.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
:sync: 8b
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HF_TOKEN=#{your huggingface token with Llama 3 access}
|
||||
python3 scripts/download_tokenizer.py \
|
||||
--repo_id meta-llama/Meta-Llama-3-8B \
|
||||
--tokenizer_path "original" \
|
||||
--hf_token=${HF_TOKEN}
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
:sync: 70b
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HF_TOKEN=#{your huggingface token with Llama 3 access}
|
||||
python3 scripts/download_tokenizer.py \
|
||||
--repo_id meta-llama/Meta-Llama-3-70B \
|
||||
--tokenizer_path "original" \
|
||||
--hf_token=${HF_TOKEN}
|
||||
|
||||
3. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
:sync: 8b
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_8b_fsdp_bf16.toml" ./run_train.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_8b_fsdp_fp8.toml" ./run_train.sh
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
:sync: 70b
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_70b_fsdp_bf16.toml" ./run_train.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_70b_fsdp_fp8.toml" ./run_train.sh
|
||||
@@ -1,133 +0,0 @@
|
||||
ROCm Version,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat]_,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,N/A,N/A,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
:doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
@@ -1,253 +0,0 @@
|
||||
.. meta::
|
||||
:description: ROCm compatibility matrix
|
||||
:keywords: GPU, architecture, hardware, compatibility, system, requirements, components, libraries
|
||||
|
||||
**************************************************************************************
|
||||
Compatibility matrix
|
||||
**************************************************************************************
|
||||
|
||||
Use this matrix to view the ROCm compatibility and system requirements across successive major and minor releases.
|
||||
|
||||
You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
|
||||
|
||||
Accelerators and GPUs listed in the following table support compute workloads (no display
|
||||
information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
|
||||
workloads, see the `Use ROCm on Radeon GPU documentation
|
||||
<https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
|
||||
compatibility and system requirements.
|
||||
|
||||
.. |br| raw:: html
|
||||
|
||||
<br/>
|
||||
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.4.3", "6.4.2", "6.3.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.5, 9.4"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10
|
||||
,"SLES 15 SP7, SP6","SLES 15 SP7, SP6","SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
|
||||
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
|
||||
,.. _architecture-support-compatibility-matrix:,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,
|
||||
,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,gfx1201 [#RDNA-OS]_,
|
||||
,gfx1200 [#RDNA-OS]_,gfx1200 [#RDNA-OS]_,
|
||||
,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
|
||||
,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2
|
||||
CUB,2.5.0,2.5.0,2.3.2
|
||||
,,,
|
||||
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
:doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
|
||||
:doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,N/A
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.3.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60300
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,18.0.0.24491
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.3.42131
|
||||
:doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.3.42131
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
|
||||
.. _OS-kernel-versions:
|
||||
|
||||
Operating systems, kernel and Glibc versions
|
||||
*********************************************
|
||||
|
||||
Use this lookup table to confirm which operating system and kernel versions are supported with ROCm.
|
||||
|
||||
.. csv-table::
|
||||
:header: "OS", "Version", "Kernel", "Glibc"
|
||||
:widths: 40, 20, 30, 20
|
||||
:stub-columns: 1
|
||||
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
|
||||
,,
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
|
||||
,9.5, 5.14+, 2.34
|
||||
,9.4, 5.14+, 2.34
|
||||
,9.3, 5.14+, 2.34
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
|
||||
,8.9, 4.18.0, 2.28
|
||||
,,
|
||||
`SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.11.0+, 2.38
|
||||
,15 SP6, "6.5.0+, 6.4.0", 2.38
|
||||
,15 SP5, 5.14.21, 2.31
|
||||
,,
|
||||
`Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
|
||||
,8, 5.15.0 (UEK), 2.28
|
||||
,,
|
||||
`Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
|
||||
,,
|
||||
`Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
|
||||
,,
|
||||
|
||||
.. note::
|
||||
|
||||
* See `Red Hat Enterprise Linux Release Dates <https://access.redhat.com/articles/3078>`_ to learn about the specific kernel versions supported on Red Hat Enterprise Linux (RHEL).
|
||||
* See `List of SUSE Linux Enterprise Server kernel <https://www.suse.com/support/kb/doc/?id=000019587>`_ to learn about the specific kernel version supported on SUSE Linux Enterprise Server (SLES).
|
||||
..
|
||||
Footnotes and ref anchors in below historical tables should be appended with "-past-60", to differentiate from the
|
||||
footnote references in the above, latest, compatibility matrix. It also allows to easily find & replace.
|
||||
An easy way to work is to download the historical.CSV file, and update open it in excel. Then when content is ready,
|
||||
delete the columns you don't need, to build the current compatibility matrix to use in above table. Find & replace all
|
||||
instances of "-past-60" to make it ready for above table.
|
||||
|
||||
|
||||
.. _past-rocm-compatibility-matrix:
|
||||
|
||||
Past versions of ROCm compatibility matrix
|
||||
***************************************************
|
||||
|
||||
Expand for full historical view of:
|
||||
|
||||
.. dropdown:: ROCm 6.0 - Present
|
||||
|
||||
You can `download the entire .csv <../downloads/compatibility-matrix-historical-6.0.csv>`_ for offline reference.
|
||||
|
||||
.. csv-table::
|
||||
:file: compatibility-matrix-historical-6.0.csv
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#verl_compat] verl is only supported on ROCm 6.2.0.
|
||||
.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
|
||||
.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
|
||||
.. [#ray_compat] Ray is only supported on ROCm 6.4.1.
|
||||
.. [#llama-cpp_compat] llama.cpp is only supported on ROCm 6.4.0.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Deep Graph Library (DGL) compatibility
|
||||
:keywords: GPU, DGL compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
DGL compatibility
|
||||
********************************************************************************
|
||||
|
||||
Deep Graph Library `(DGL) <https://www.dgl.ai/>`_ is an easy-to-use, high-performance and scalable
|
||||
Python package for deep learning on graphs. DGL is framework agnostic, meaning
|
||||
if a deep graph model is a component in an end-to-end application, the rest of
|
||||
the logic is implemented using PyTorch.
|
||||
|
||||
* ROCm support for DGL is hosted in the `https://github.com/ROCm/dgl <https://github.com/ROCm/dgl>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker images <dgl-docker-compat>` with DGL, PyTorch, and ROCm preinstalled.
|
||||
* See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>`
|
||||
to install and get started.
|
||||
|
||||
|
||||
Supported devices
|
||||
================================================================================
|
||||
|
||||
- **Officially Supported**: TF32 with AMD Instinct MI300X (through hipblaslt)
|
||||
- **Partially Supported**: TF32 with AMD Instinct MI250X
|
||||
|
||||
|
||||
.. _dgl-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
DGL can be used for Graph Learning, and building popular graph models like
|
||||
GAT, GCN and GraphSage. Using these we can support a variety of use-cases such as:
|
||||
|
||||
- Recommender systems
|
||||
- Network Optimization and Analysis
|
||||
- 1D (Temporal) and 2D (Image) Classification
|
||||
- Drug Discovery
|
||||
|
||||
Multiple use cases of DGL have been tested and verified.
|
||||
However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
|
||||
Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_,
|
||||
where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs.
|
||||
|
||||
Coverage includes:
|
||||
|
||||
- Single-GPU training/inference
|
||||
- Multi-GPU training
|
||||
|
||||
|
||||
.. _dgl-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
|
||||
Click the |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: DGL Docker image components
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker
|
||||
- DGL
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
|
||||
Key ROCm libraries for DGL
|
||||
================================================================================
|
||||
|
||||
DGL on ROCm depends on specific libraries that affect its features and performance.
|
||||
Using the DGL Docker container or building it with the provided docker file or a ROCm base image is recommended.
|
||||
If you prefer to build it yourself, ensure the following dependencies are installed:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
|
||||
- :version-ref:`"Composable Kernel" rocm_version`
|
||||
- Enables faster execution of core operations like matrix multiplication
|
||||
(GEMM), convolutions and transformations.
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
singular value decompositions (SVD).
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
||||
- :version-ref:`hipSPARSELt rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
* - `hipTensor <https://github.com/ROCm/hipTensor>`_
|
||||
- :version-ref:`hipTensor rocm_version`
|
||||
- Optimizes for high-performance tensor operations, such as contractions.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Optimizes deep learning primitives such as convolutions, pooling,
|
||||
normalization, and activation functions.
|
||||
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
|
||||
- :version-ref:`MIGraphX rocm_version`
|
||||
- Adds graph-level optimizations, ONNX models and mixed precision support
|
||||
and enable Ahead-of-Time (AOT) Compilation.
|
||||
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
|
||||
- :version-ref:`MIVisionX rocm_version`
|
||||
- Optimizes acceleration for computer vision and AI workloads like
|
||||
preprocessing, augmentation, and inferencing.
|
||||
* - `rocAL <https://github.com/ROCm/rocAL>`_
|
||||
- :version-ref:`rocAL rocm_version`
|
||||
- Accelerates the data pipeline by offloading intensive preprocessing and
|
||||
augmentation tasks. rocAL is part of MIVisionX.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||
Broadcast.
|
||||
* - `rocDecode <https://github.com/ROCm/rocDecode>`_
|
||||
- :version-ref:`rocDecode rocm_version`
|
||||
- Provides hardware-accelerated data decoding capabilities, particularly
|
||||
for image, video, and other dataset formats.
|
||||
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
|
||||
- :version-ref:`rocJPEG rocm_version`
|
||||
- Provides hardware-accelerated JPEG image decoding and encoding.
|
||||
* - `RPP <https://github.com/ROCm/RPP>`_
|
||||
- :version-ref:`RPP rocm_version`
|
||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
|
||||
- :version-ref:`rocWMMA rocm_version`
|
||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||
multiplication (GEMM) and accumulation operations with mixed precision
|
||||
support.
|
||||
|
||||
|
||||
Supported features
|
||||
================================================================================
|
||||
|
||||
Many functions and methods available in DGL Upstream are also supported in DGL ROCm.
|
||||
Instead of listing them all, support is grouped into the following categories to provide a general overview.
|
||||
|
||||
* DGL Base
|
||||
* DGL Backend
|
||||
* DGL Data
|
||||
* DGL Dataloading
|
||||
* DGL DGLGraph
|
||||
* DGL Function
|
||||
* DGL Ops
|
||||
* DGL Sampling
|
||||
* DGL Transforms
|
||||
* DGL Utils
|
||||
* DGL Distributed
|
||||
* DGL Geometry
|
||||
* DGL Mpops
|
||||
* DGL NN
|
||||
* DGL Optim
|
||||
* DGL Sparse
|
||||
|
||||
|
||||
Unsupported features
|
||||
================================================================================
|
||||
|
||||
* Graphbolt
|
||||
* Partial TF32 Support (MI250x only)
|
||||
* Kineto/ ROCTracer integration
|
||||
|
||||
|
||||
Unsupported functions
|
||||
================================================================================
|
||||
|
||||
* ``more_nnz``
|
||||
* ``format``
|
||||
* ``multiprocess_sparse_adam_state_dict``
|
||||
* ``record_stream_ndarray``
|
||||
* ``half_spmm``
|
||||
* ``segment_mm``
|
||||
* ``gather_mm_idx_b``
|
||||
* ``pgexplainer``
|
||||
* ``sample_labors_prob``
|
||||
* ``sample_labors_noprob``
|
||||
@@ -1,314 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: JAX compatibility
|
||||
:keywords: GPU, JAX compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
JAX compatibility
|
||||
*******************************************************************************
|
||||
|
||||
JAX provides a NumPy-like API, which combines automatic differentiation and the
|
||||
Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
|
||||
learning at scale.
|
||||
|
||||
JAX uses composable transformations of Python and NumPy through just-in-time
|
||||
(JIT) compilation, automatic vectorization, and parallelization. To learn about
|
||||
JAX, including profiling and optimizations, see the official `JAX documentation
|
||||
<https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
|
||||
|
||||
ROCm support for JAX is upstreamed, and users can build the official source code
|
||||
with ROCm support:
|
||||
|
||||
- ROCm JAX release:
|
||||
|
||||
- Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
|
||||
with ROCm and JAX preinstalled.
|
||||
|
||||
- ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
|
||||
|
||||
- See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
to get started.
|
||||
|
||||
- Official JAX release:
|
||||
|
||||
- Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
|
||||
|
||||
- See the `AMD GPU (Linux) installation section
|
||||
<https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
|
||||
the JAX documentation.
|
||||
|
||||
.. note::
|
||||
|
||||
AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
quarterly alongside new ROCm releases. These images undergo full AMD testing.
|
||||
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
follow upstream JAX releases and use the latest available ROCm version.
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
|
||||
blog explores the implementation and training of a Generative Pre-trained
|
||||
Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s JAX-based
|
||||
nanoGPT. Comparing how essential GPT components—such as self-attention
|
||||
mechanisms and optimizers—are realized in JAX and JAX, also highlights
|
||||
JAX’s unique features.
|
||||
|
||||
* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
|
||||
ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
|
||||
blog post provides a comprehensive guide on enhancing the training efficiency
|
||||
of GPT models by implementing mixed precision techniques in JAX, specifically
|
||||
tailored for AMD GPUs utilizing the ROCm platform.
|
||||
|
||||
* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
|
||||
blog demonstrates how to develop a custom fused dropout-activation kernel for
|
||||
matrices using Triton, integrate it with JAX, and benchmark its performance
|
||||
using ROCm.
|
||||
|
||||
* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
|
||||
outlines the process of fine-tuning a Bidirectional Encoder Representations
|
||||
from Transformers (BERT)-based large language model (LLM) using JAX for a text
|
||||
classification task. The blog post discuss techniques for parallelizing the
|
||||
fine-tuning across multiple AMD GPUs and assess the model's performance on a
|
||||
holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
|
||||
and the General Language Understanding Evaluation (GLUE) benchmark dataset was
|
||||
used on a multi-GPU setup.
|
||||
|
||||
* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. The page is aimed at helping users achieve optimal
|
||||
performance for deep learning and other high-performance computing tasks on
|
||||
the MI300X GPU.
|
||||
|
||||
For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
|
||||
|
||||
.. _jax-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories represent the latest JAX version from the official Docker Hub and are validated for
|
||||
`ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`_. Click the |docker-icon|
|
||||
icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: JAX Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.12/images/sha256-8918fa806a172c1a10eb2f57131eb31b5d7c8fa1656b8729fe7d3d736112de83"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 24.04
|
||||
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.10/images/sha256-a394be13c67b7fc602216abee51233afd4b6cb7adaa57ca97e688fba82f9ad79"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
|
||||
|
||||
AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
|
||||
|
||||
.. list-table:: JAX community Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
.. _key_rocm_libraries:
|
||||
|
||||
Key ROCm libraries for JAX
|
||||
================================================================================
|
||||
|
||||
The following ROCm libraries represent potential targets that could be utilized
|
||||
by JAX on ROCm for various computational tasks. The actual libraries used will
|
||||
depend on the specific implementation and operations performed.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of hipBLAS, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
singular value decompositions (SVD).
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
||||
- :version-ref:`hipSPARSELt rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Optimized for deep learning primitives such as convolutions, pooling,
|
||||
normalization, and activation functions.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimized for multi-GPU communication for operations like all-reduce,
|
||||
broadcast, and scatter.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
|
||||
.. note::
|
||||
|
||||
This table shows ROCm libraries that could potentially be utilized by JAX. Not
|
||||
all libraries may be used in every configuration, and the actual library usage
|
||||
will depend on the specific operations and implementation details.
|
||||
|
||||
Supported data types and modules
|
||||
===============================================================================
|
||||
|
||||
The following tables lists the supported public JAX API data types and modules.
|
||||
|
||||
Supported data types
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
ROCm supports all the JAX data types of `jax.dtypes <https://docs.jax.dev/en/latest/jax.dtypes.html>`_
|
||||
module, `jax.numpy.dtype <https://docs.jax.dev/en/latest/_autosummary/jax.numpy.dtype.html>`_
|
||||
and `default_dtype <https://docs.jax.dev/en/latest/default_dtypes.html>`_ .
|
||||
The ROCm supported data types in JAX are collected in the following table.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
|
||||
* - ``bfloat16``
|
||||
- 16-bit bfloat (brain floating point).
|
||||
|
||||
* - ``bool``
|
||||
- Boolean.
|
||||
|
||||
* - ``complex128``
|
||||
- 128-bit complex.
|
||||
|
||||
* - ``complex64``
|
||||
- 64-bit complex.
|
||||
|
||||
* - ``float16``
|
||||
- 16-bit (half precision) floating-point.
|
||||
|
||||
* - ``float32``
|
||||
- 32-bit (single precision) floating-point.
|
||||
|
||||
* - ``float64``
|
||||
- 64-bit (double precision) floating-point.
|
||||
|
||||
* - ``half``
|
||||
- 16-bit (half precision) floating-point.
|
||||
|
||||
* - ``int16``
|
||||
- Signed 16-bit integer.
|
||||
|
||||
* - ``int32``
|
||||
- Signed 32-bit integer.
|
||||
|
||||
* - ``int64``
|
||||
- Signed 64-bit integer.
|
||||
|
||||
* - ``int8``
|
||||
- Signed 8-bit integer.
|
||||
|
||||
* - ``uint16``
|
||||
- Unsigned 16-bit (word) integer.
|
||||
|
||||
* - ``uint32``
|
||||
- Unsigned 32-bit (dword) integer.
|
||||
|
||||
* - ``uint64``
|
||||
- Unsigned 64-bit (qword) integer.
|
||||
|
||||
* - ``uint8``
|
||||
- Unsigned 8-bit (byte) integer.
|
||||
|
||||
.. note::
|
||||
|
||||
JAX data type support is effected by the :ref:`key_rocm_libraries` and it's
|
||||
collected on :doc:`ROCm data types and precision support <rocm:reference/precision-support>`
|
||||
page.
|
||||
|
||||
Supported modules
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
For a complete and up-to-date list of JAX public modules (for example, ``jax.numpy``,
|
||||
``jax.scipy``, ``jax.lax``), their descriptions, and usage, please refer directly to the
|
||||
`official JAX API documentation <https://jax.readthedocs.io/en/latest/jax.html>`_.
|
||||
|
||||
.. note::
|
||||
|
||||
Since version 0.1.56, JAX has full support for ROCm, and the
|
||||
:ref:`Known issues and important notes <jax_comp_known_issues>` section
|
||||
contains details about limitations specific to the ROCm backend. The list of
|
||||
JAX API modules is maintained by the JAX project and is subject to change.
|
||||
Refer to the official Jax documentation for the most up-to-date information.
|
||||
@@ -1,156 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: llama.cpp deep learning framework compatibility
|
||||
:keywords: GPU, GGML, llama.cpp compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
llama.cpp compatibility
|
||||
********************************************************************************
|
||||
|
||||
`llama.cpp <https://github.com/ggml-org/llama.cpp>`__ is an open-source framework
|
||||
for Large Language Model (LLM) inference that runs on both central processing units
|
||||
(CPUs) and graphics processing units (GPUs). It is written in plain C/C++, providing
|
||||
a simple, dependency-free setup.
|
||||
|
||||
The framework supports multiple quantization options, from 1.5-bit to 8-bit integers,
|
||||
to speed up inference and reduce memory usage. Originally built as a CPU-first library,
|
||||
llama.cpp is easy to integrate with other programming environments and is widely
|
||||
adopted across diverse platforms, including consumer devices.
|
||||
|
||||
ROCm support for llama.cpp is upstreamed, and you can build the official source code
|
||||
with ROCm support:
|
||||
|
||||
- ROCm support for llama.cpp is hosted in the official `https://github.com/ROCm/llama.cpp
|
||||
<https://github.com/ROCm/llama.cpp>`_ repository.
|
||||
|
||||
- Due to independent compatibility considerations, this location differs from the
|
||||
`https://github.com/ggml-org/llama.cpp <https://github.com/ggml-org/llama.cpp>`_ upstream repository.
|
||||
|
||||
- To install llama.cpp, use the prebuilt :ref:`Docker image <llama-cpp-docker-compat>`,
|
||||
which includes ROCm, llama.cpp, and all required dependencies.
|
||||
|
||||
- See the :doc:`ROCm llama.cpp installation guide <rocm-install-on-linux:install/3rd-party/llama-cpp-install>`
|
||||
to install and get started.
|
||||
|
||||
- See the `Installation guide <https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md#hip>`__
|
||||
in the upstream llama.cpp documentation.
|
||||
|
||||
.. note::
|
||||
|
||||
llama.cpp is supported on ROCm 6.4.0.
|
||||
|
||||
Supported devices
|
||||
================================================================================
|
||||
|
||||
**Officially Supported**: AMD Instinct™ MI300X, MI210
|
||||
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
llama.cpp can be applied in a variety of scenarios, particularly when you need to meet one or more of the following requirements:
|
||||
|
||||
- Plain C/C++ implementation with no external dependencies
|
||||
- Support for 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory usage
|
||||
- Custom HIP (Heterogeneous-compute Interface for Portability) kernels for running large language models (LLMs) on AMD GPUs (graphics processing units)
|
||||
- CPU (central processing unit) + GPU (graphics processing unit) hybrid inference for partially accelerating models larger than the total available VRAM (video random-access memory)
|
||||
|
||||
llama.cpp is also used in a range of real-world applications, including:
|
||||
|
||||
- Games such as `Lucy's Labyrinth <https://github.com/MorganRO8/Lucys_Labyrinth>`__:
|
||||
A simple maze game where AI-controlled agents attempt to trick the player.
|
||||
- Tools such as `Styled Lines <https://marketplace.unity.com/packages/tools/ai-ml-integration/style-text-webgl-ios-stand-alone-llm-llama-cpp-wrapper-292902>`__:
|
||||
A proprietary, asynchronous inference wrapper for Unity3D game development, including pre-built mobile and web platform wrappers and a model example.
|
||||
- Various other AI applications use llama.cpp as their inference engine;
|
||||
for a detailed list, see the `user interfaces (UIs) section <https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#description>`__.
|
||||
|
||||
For more use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||
where you can search for llama.cpp examples and best practices to optimize your workloads on AMD GPUs.
|
||||
|
||||
- The `Llama.cpp Meets Instinct: A New Era of Open-Source AI Acceleration <https://rocm.blogs.amd.com/ecosystems-and-partners/llama-cpp/README.html>`__,
|
||||
blog post outlines how the open-source llama.cpp framework enables efficient LLM inference—including interactive inference with ``llama-cli``,
|
||||
server deployment with ``llama-server``, GGUF model preparation and quantization, performance benchmarking, and optimizations tailored for
|
||||
AMD Instinct GPUs within the ROCm ecosystem.
|
||||
|
||||
.. _llama-cpp-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `ROCm llama.cpp Docker images <https://hub.docker.com/r/rocm/llama.cpp>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. important::
|
||||
|
||||
Tag endings of ``_full``, ``_server``, and ``_light`` serve different purposes for entrypoints as follows:
|
||||
|
||||
- Full: This image includes both the main executable file and the tools to convert ``LLaMA`` models into ``ggml`` and convert into 4-bit quantization.
|
||||
- Server: This image only includes the server executable file.
|
||||
- Light: This image only includes the main executable file.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Full Docker
|
||||
- Server Docker
|
||||
- Light Docker
|
||||
- llama.cpp
|
||||
- Ubuntu
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_full/images/sha256-f78f6c81ab2f8e957469415fe2370a1334fe969c381d1fe46050c85effaee9d5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_server/images/sha256-275ad9e18f292c26a00a2de840c37917e98737a88a3520bdc35fd3fc5c9a6a9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_light/images/sha256-cc324e6faeedf0e400011f07b49d2dc41a16bae257b2b7befa0f4e2e97231320"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b5997 <https://github.com/ROCm/llama.cpp/tree/release/b5997>`__
|
||||
- 24.04
|
||||
|
||||
Key ROCm libraries for llama.cpp
|
||||
================================================================================
|
||||
|
||||
llama.cpp functionality on ROCm is determined by its underlying library
|
||||
dependencies. These ROCm components affect the capabilities, performance, and
|
||||
feature set available to developers.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
- Usage
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Supports operations such as matrix multiplication, matrix-vector
|
||||
products, and tensor contractions. Utilized in both dense and batched
|
||||
linear algebra operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- By setting the flag ``ROCBLAS_USE_HIPBLASLT``, you can dispatch hipblasLt
|
||||
kernels where possible.
|
||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
|
||||
- :version-ref:`rocWMMA rocm_version`
|
||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||
multiplication (GEMM) and accumulation operations with mixed precision
|
||||
support.
|
||||
- Can be used to enhance the flash attention performance on AMD compute, by enabling
|
||||
the flag during compile time.
|
||||
@@ -1,93 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Megablocks compatibility
|
||||
:keywords: GPU, megablocks, compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
Megablocks compatibility
|
||||
********************************************************************************
|
||||
|
||||
Megablocks is a light-weight library for mixture-of-experts (MoE) training.
|
||||
The core of the system is efficient "dropless-MoE" and standard MoE layers.
|
||||
Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_,
|
||||
where data and pipeline parallel training of MoEs is supported.
|
||||
|
||||
* ROCm support for Megablocks is hosted in the official `https://github.com/ROCm/megablocks <https://github.com/ROCm/megablocks>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker image <megablocks-docker-compat>` with ROCm, PyTorch, and Megablocks preinstalled.
|
||||
* See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` to install and get started.
|
||||
|
||||
.. note::
|
||||
|
||||
Megablocks is supported on ROCm 6.3.0.
|
||||
|
||||
Supported devices
|
||||
================================================================================
|
||||
|
||||
- **Officially Supported**: AMD Instinct MI300X
|
||||
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
|
||||
|
||||
Supported models and features
|
||||
================================================================================
|
||||
|
||||
This section summarizes the Megablocks features supported by ROCm.
|
||||
|
||||
* Distributed Pre-training
|
||||
* Activation Checkpointing and Recomputation
|
||||
* Distributed Optimizer
|
||||
* Mixture-of-Experts
|
||||
* dropless-Mixture-of-Experts
|
||||
|
||||
|
||||
.. _megablocks-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
The `ROCm Megablocks blog posts <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_
|
||||
guide how to leverage the ROCm platform for pre-training using the Megablocks framework.
|
||||
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
|
||||
|
||||
* Single-GPU pre-training
|
||||
* Multi-GPU pre-training
|
||||
|
||||
|
||||
.. _megablocks-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `ROCm Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest Megatron-LM version from the official Docker Hub.
|
||||
The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- Megablocks
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/megablocks/megablocks-0.7.0_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-372ff89b96599019b8f5f9db469c84add2529b713456781fa62eb9a148659ab4"><i class="fab fa-docker fa-lg"></i> rocm/megablocks</a>
|
||||
- `6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_
|
||||
- `0.7.0 <https://github.com/databricks/megablocks/releases/tag/v0.7.0>`_
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
|
||||
|
||||
@@ -1,535 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: PyTorch compatibility
|
||||
:keywords: GPU, PyTorch compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
PyTorch compatibility
|
||||
********************************************************************************
|
||||
|
||||
`PyTorch <https://pytorch.org/>`__ is an open-source tensor library designed for
|
||||
deep learning. PyTorch on ROCm provides mixed-precision and large-scale training
|
||||
using `MIOpen <https://github.com/ROCm/MIOpen>`__ and
|
||||
`RCCL <https://github.com/ROCm/rccl>`__ libraries.
|
||||
|
||||
ROCm support for PyTorch is upstreamed into the official PyTorch repository. Due
|
||||
to independent compatibility considerations, this results in two distinct
|
||||
release cycles for PyTorch on ROCm:
|
||||
|
||||
- ROCm PyTorch release:
|
||||
|
||||
- Provides the latest version of ROCm but might not necessarily support the
|
||||
latest stable PyTorch version.
|
||||
|
||||
- Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
|
||||
preinstalled.
|
||||
|
||||
- ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`__
|
||||
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
to get started.
|
||||
|
||||
- Official PyTorch release:
|
||||
|
||||
- Provides the latest stable version of PyTorch but might not necessarily
|
||||
support the latest ROCm version.
|
||||
|
||||
- Official PyTorch repository: `<https://github.com/pytorch/pytorch>`__
|
||||
|
||||
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`__
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`__
|
||||
to get started.
|
||||
|
||||
PyTorch includes tooling that generates HIP source code from the CUDA backend.
|
||||
This approach allows PyTorch to support ROCm without requiring manual code
|
||||
modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
|
||||
|
||||
ROCm development is aligned with the stable release of PyTorch, while upstream
|
||||
PyTorch testing uses the stable release of ROCm to maintain consistency.
|
||||
|
||||
.. _pytorch-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
|
||||
guides how to leverage the ROCm platform for training AI models. It covers the
|
||||
steps, tools, and best practices for optimizing training workflows on AMD GPUs
|
||||
using PyTorch features.
|
||||
|
||||
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates how to use the ROCm platform for the fine-tuning
|
||||
and inference of machine learning models, particularly large language models
|
||||
(LLMs), on systems with a single GPU. This topic provides a detailed guide for
|
||||
setting up, optimizing, and executing fine-tuning and inference workflows in
|
||||
such environments.
|
||||
|
||||
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates the fine-tuning and inference of machine learning
|
||||
models on systems with multiple GPUs.
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. This guide helps users achieve optimal performance for
|
||||
deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the
|
||||
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
|
||||
GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`__.
|
||||
|
||||
.. _pytorch-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: PyTorch Docker image components
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- Apex
|
||||
- torchvision
|
||||
- TensorBoard
|
||||
- MAGMA
|
||||
- UCX
|
||||
- OMPI
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-6a287591500b4048a9556c1ecc92bc411fd3d552f6c8233bc399f18eb803e8d6"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-06b967629ba6657709f04169832cd769a11e6b491e8b1394c361d42d7a0c8b43"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-62022414217ef6de33ac5b1341e57db8a48e8573fa2ace12d48aa5edd4b99ef0"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-469a7f74fc149aff31797e011ee41978f6a190adc69fa423b3c6a718a77bd985"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- 22.04
|
||||
- `3.11 <https://www.python.org/downloads/release/python-31113/>`__
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-37f41a1cd94019688669a1b20d33ea74156e0c129ef6b8270076ef214a6a1a2c"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-60824ba83dc1b9d94164925af1f81c0235c105dd555091ec04c57e05177ead1b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-fe944fe083312f901be6891ab4d3ffebf2eaf2cf4f5f0f435ef0b76ec714fabd"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-1d59251c47170c5b8960d1172a4dbe52f5793d8966edd778f168eaf32d56661a"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`__
|
||||
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`__
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
Key ROCm libraries for PyTorch
|
||||
================================================================================
|
||||
|
||||
PyTorch functionality on ROCm is determined by its underlying library
|
||||
dependencies. These ROCm components affect the capabilities, performance, and
|
||||
feature set available to developers.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
- Used in
|
||||
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`__
|
||||
- :version-ref:`"Composable Kernel" rocm_version`
|
||||
- Enables faster execution of core operations like matrix multiplication
|
||||
(GEMM), convolutions and transformations.
|
||||
- Speeds up ``torch.permute``, ``torch.view``, ``torch.matmul``,
|
||||
``torch.mm``, ``torch.bmm``, ``torch.nn.Conv2d``, ``torch.nn.Conv3d``
|
||||
and ``torch.nn.MultiheadAttention``.
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Supports operations such as matrix multiplication, matrix-vector
|
||||
products, and tensor contractions. Utilized in both dense and batched
|
||||
linear algebra operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
|
||||
matrix multiplications used in convolutional and linear layers.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`__
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Supports operations such as ``torch.sum``, ``torch.cumsum``,
|
||||
``torch.sort`` irregular shapes often involve scanning, sorting, and
|
||||
filtering, which hipCUB handles efficiently.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`__
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
- Used in functions like the ``torch.fft`` module.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`__
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
- The ``torch.rand``, ``torch.randn``, and stochastic layers like
|
||||
``torch.nn.Dropout`` rely on hipRAND.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`__
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
singular value decompositions (SVD).
|
||||
- Supports functions like ``torch.linalg.solve``,
|
||||
``torch.linalg.eig``, and ``torch.linalg.svd``.
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`__
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
- Sparse tensor operations ``torch.sparse``.
|
||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`__
|
||||
- :version-ref:`hipSPARSELt rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
- Sparse tensor operations ``torch.sparse``.
|
||||
* - `hipTensor <https://github.com/ROCm/hipTensor>`__
|
||||
- :version-ref:`hipTensor rocm_version`
|
||||
- Optimizes for high-performance tensor operations, such as contractions.
|
||||
- Accelerates tensor algebra, especially in deep learning and scientific
|
||||
computing.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`__
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Optimizes deep learning primitives such as convolutions, pooling,
|
||||
normalization, and activation functions.
|
||||
- Speeds up convolutional neural networks (CNNs), recurrent neural
|
||||
networks (RNNs), and other layers. Used in operations like
|
||||
``torch.nn.Conv2d``, ``torch.nn.ReLU``, and ``torch.nn.LSTM``.
|
||||
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`__
|
||||
- :version-ref:`MIGraphX rocm_version`
|
||||
- Adds graph-level optimizations, ONNX models and mixed precision support
|
||||
and enable Ahead-of-Time (AOT) Compilation.
|
||||
- Speeds up inference models and executes ONNX models for
|
||||
compatibility with other frameworks.
|
||||
``torch.nn.Conv2d``, ``torch.nn.ReLU``, and ``torch.nn.LSTM``.
|
||||
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`__
|
||||
- :version-ref:`MIVisionX rocm_version`
|
||||
- Optimizes acceleration for computer vision and AI workloads like
|
||||
preprocessing, augmentation, and inferencing.
|
||||
- Faster data preprocessing and augmentation pipelines for datasets like
|
||||
ImageNet or COCO and easy to integrate into PyTorch's ``torch.utils.data``
|
||||
and ``torchvision`` workflows.
|
||||
* - `rocAL <https://github.com/ROCm/rocAL>`__
|
||||
- :version-ref:`rocAL rocm_version`
|
||||
- Accelerates the data pipeline by offloading intensive preprocessing and
|
||||
augmentation tasks. rocAL is part of MIVisionX.
|
||||
- Easy to integrate into PyTorch's ``torch.utils.data`` and
|
||||
``torchvision`` data load workloads.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`__
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||
Broadcast.
|
||||
- Distributed data parallel training (``torch.nn.parallel.DistributedDataParallel``).
|
||||
Handles communication in multi-GPU setups.
|
||||
* - `rocDecode <https://github.com/ROCm/rocDecode>`__
|
||||
- :version-ref:`rocDecode rocm_version`
|
||||
- Provides hardware-accelerated data decoding capabilities, particularly
|
||||
for image, video, and other dataset formats.
|
||||
- Can be integrated in ``torch.utils.data``, ``torchvision.transforms``
|
||||
and ``torch.distributed``.
|
||||
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`__
|
||||
- :version-ref:`rocJPEG rocm_version`
|
||||
- Provides hardware-accelerated JPEG image decoding and encoding.
|
||||
- GPU accelerated ``torchvision.io.decode_jpeg`` and
|
||||
``torchvision.io.encode_jpeg`` and can be integrated in
|
||||
``torch.utils.data`` and ``torchvision``.
|
||||
* - `RPP <https://github.com/ROCm/RPP>`__
|
||||
- :version-ref:`RPP rocm_version`
|
||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||
- Easy to integrate into PyTorch's ``torch.utils.data`` and
|
||||
``torchvision`` data load workloads to speed up data processing.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`__
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
- Utilized in backend operations for tensor computations requiring
|
||||
parallel processing.
|
||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
|
||||
- :version-ref:`rocWMMA rocm_version`
|
||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||
multiplication (GEMM) and accumulation operations with mixed precision
|
||||
support.
|
||||
- Linear layers (``torch.nn.Linear``), convolutional layers
|
||||
(``torch.nn.Conv2d``), attention layers, general tensor operations that
|
||||
involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
|
||||
more.
|
||||
|
||||
Supported modules and data types
|
||||
================================================================================
|
||||
|
||||
The following section outlines the supported data types, modules, and domain libraries available in PyTorch on ROCm.
|
||||
|
||||
Supported data types
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The tensor data type is specified using the ``dtype`` attribute or argument.
|
||||
PyTorch supports many data types for different use cases.
|
||||
|
||||
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`__
|
||||
single data types:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
* - ``torch.float8_e4m3fn``
|
||||
- 8-bit floating point, e4m3
|
||||
* - ``torch.float8_e5m2``
|
||||
- 8-bit floating point, e5m2
|
||||
* - ``torch.float16`` or ``torch.half``
|
||||
- 16-bit floating point
|
||||
* - ``torch.bfloat16``
|
||||
- 16-bit floating point
|
||||
* - ``torch.float32`` or ``torch.float``
|
||||
- 32-bit floating point
|
||||
* - ``torch.float64`` or ``torch.double``
|
||||
- 64-bit floating point
|
||||
* - ``torch.complex32`` or ``torch.chalf``
|
||||
- 32-bit complex numbers
|
||||
* - ``torch.complex64`` or ``torch.cfloat``
|
||||
- 64-bit complex numbers
|
||||
* - ``torch.complex128`` or ``torch.cdouble``
|
||||
- 128-bit complex numbers
|
||||
* - ``torch.uint8``
|
||||
- 8-bit integer (unsigned)
|
||||
* - ``torch.uint16``
|
||||
- 16-bit integer (unsigned);
|
||||
Not natively supported in ROCm
|
||||
* - ``torch.uint32``
|
||||
- 32-bit integer (unsigned);
|
||||
Not natively supported in ROCm
|
||||
* - ``torch.uint64``
|
||||
- 64-bit integer (unsigned);
|
||||
Not natively supported in ROCm
|
||||
* - ``torch.int8``
|
||||
- 8-bit integer (signed)
|
||||
* - ``torch.int16`` or ``torch.short``
|
||||
- 16-bit integer (signed)
|
||||
* - ``torch.int32`` or ``torch.int``
|
||||
- 32-bit integer (signed)
|
||||
* - ``torch.int64`` or ``torch.long``
|
||||
- 64-bit integer (signed)
|
||||
* - ``torch.bool``
|
||||
- Boolean
|
||||
* - ``torch.quint8``
|
||||
- Quantized 8-bit integer (unsigned)
|
||||
* - ``torch.qint8``
|
||||
- Quantized 8-bit integer (signed)
|
||||
* - ``torch.qint32``
|
||||
- Quantized 32-bit integer (signed)
|
||||
* - ``torch.quint4x2``
|
||||
- Quantized 4-bit integer (unsigned)
|
||||
|
||||
.. note::
|
||||
|
||||
Unsigned types, except ``uint8``, have limited support in eager mode. They
|
||||
primarily exist to assist usage with ``torch.compile``.
|
||||
|
||||
See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
|
||||
native hardware support of data types.
|
||||
|
||||
Supported modules
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
For a complete and up-to-date list of PyTorch core modules (for example., ``torch``,
|
||||
``torch.nn``, ``torch.cuda``, ``torch.backends.cuda`` and
|
||||
``torch.backends.cudnn``), their descriptions, and usage, please refer directly
|
||||
to the `official PyTorch documentation <https://pytorch.org/docs/stable/index.html>`_.
|
||||
|
||||
Core PyTorch functionality on ROCm includes tensor operations, neural network
|
||||
layers, automatic differentiation, distributed training, mixed-precision
|
||||
training, compilation features, and domain-specific libraries for audio, vision,
|
||||
text processing, and more.
|
||||
|
||||
Supported domain libraries
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
PyTorch offers specialized `domain libraries <https://pytorch.org/domains/>`_ with
|
||||
GPU acceleration that build on its core features to support specific application
|
||||
areas. The table below lists the PyTorch domain libraries that are compatible
|
||||
with ROCm.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Library
|
||||
- Description
|
||||
|
||||
* - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_
|
||||
- Audio and signal processing library for PyTorch. Provides utilities for
|
||||
audio I/O, signal and data processing functions, datasets, model
|
||||
implementations, and application components for audio and speech
|
||||
processing tasks.
|
||||
|
||||
**Note:** To ensure GPU-acceleration with ``torchaudio.transforms``,
|
||||
you need to explicitly move audio data (waveform tensor) to GPU using
|
||||
``.to('cuda')``.
|
||||
|
||||
* - `torchtune <https://docs.pytorch.org/torchtune/stable/index.html>`_
|
||||
- PyTorch-native library designed for fine-tuning large language models
|
||||
(LLMs). Provides supports the full fine-tuning workflow and offers
|
||||
compatibility with popular production inference systems.
|
||||
|
||||
**Note:** Only official release exists.
|
||||
|
||||
* - `torchvision <https://docs.pytorch.org/vision/stable/index.html>`_
|
||||
- Computer vision library that is part of the PyTorch project. Provides
|
||||
popular datasets, model architectures, and common image transformations
|
||||
for computer vision applications.
|
||||
|
||||
* - `torchtext <https://docs.pytorch.org/text/stable/index.html>`_
|
||||
- Text processing library for PyTorch. Provides data processing utilities
|
||||
and popular datasets for natural language processing, including
|
||||
tokenization, vocabulary management, and text embeddings.
|
||||
|
||||
**Note:** ``torchtext`` does not implement ROCm-specific kernels.
|
||||
ROCm acceleration is provided through the underlying PyTorch framework
|
||||
and ROCm library integration. Only official release exists.
|
||||
|
||||
* - `torchdata <https://docs.pytorch.org/data/beta/index.html>`_
|
||||
- Beta library of common modular data loading primitives for easily
|
||||
constructing flexible and performant data pipelines, with features still
|
||||
in prototype stage.
|
||||
|
||||
* - `torchrec <https://docs.pytorch.org/torchrec/>`_
|
||||
- PyTorch domain library for common sparsity and parallelism primitives
|
||||
needed for large-scale recommender systems, enabling authors to train
|
||||
models with large embedding tables shared across many GPUs.
|
||||
|
||||
**Note:** ``torchrec`` does not implement ROCm-specific kernels. ROCm
|
||||
acceleration is provided through the underlying PyTorch framework and
|
||||
ROCm library integration.
|
||||
|
||||
* - `torchserve <https://docs.pytorch.org/serve/>`_
|
||||
- Performant, flexible and easy-to-use tool for serving PyTorch models in
|
||||
production, providing features for model management, batch processing,
|
||||
and scalable deployment.
|
||||
|
||||
**Note:** `torchserve <https://docs.pytorch.org/serve/>`_ is no longer
|
||||
actively maintained. Last official release is sent out with PyTorch 2.4.
|
||||
|
||||
* - `torchrl <https://docs.pytorch.org/rl/stable/index.html>`_
|
||||
- Open-source, Python-first Reinforcement Learning library for PyTorch
|
||||
with a focus on high modularity and good runtime performance, providing
|
||||
low and high-level RL abstractions and reusable functionals for cost
|
||||
functions, returns, and data processing.
|
||||
|
||||
**Note:** Only official release exists.
|
||||
|
||||
* - `tensordict <https://docs.pytorch.org/tensordict/stable/index.html>`_
|
||||
- Dictionary-like class that simplifies operations on batches of tensors,
|
||||
enhancing code readability, compactness, and modularity by abstracting
|
||||
tailored operations and reducing errors through automatic operation
|
||||
dispatching.
|
||||
|
||||
**Note:** Only official release exists.
|
||||
@@ -1,111 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Ray deep learning framework compatibility
|
||||
:keywords: GPU, Ray compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
Ray compatibility
|
||||
*******************************************************************************
|
||||
|
||||
Ray is a unified framework for scaling AI and Python applications from your laptop
|
||||
to a full cluster, without changing your code. Ray consists of `a core distributed
|
||||
runtime <https://docs.ray.io/en/latest/ray-core/walkthrough.html>`_ and a set of
|
||||
`AI libraries <https://docs.ray.io/en/latest/ray-air/getting-started.html>`_ for
|
||||
simplifying machine learning computations.
|
||||
|
||||
Ray is a general-purpose framework that runs many types of workloads efficiently.
|
||||
Any Python application can be scaled with Ray, without extra infrastructure.
|
||||
|
||||
ROCm support for Ray is upstreamed, and you can build the official source code
|
||||
with ROCm support:
|
||||
|
||||
- ROCm support for Ray is hosted in the official `https://github.com/ROCm/ray
|
||||
<https://github.com/ROCm/ray>`_ repository.
|
||||
|
||||
- Due to independent compatibility considerations, this location differs from the
|
||||
`https://github.com/ray-project/ray <https://github.com/ray-project/ray>`_ upstream repository.
|
||||
|
||||
- To install Ray, use the prebuilt :ref:`Docker image <ray-docker-compat>`
|
||||
which includes ROCm, Ray, and all required dependencies.
|
||||
|
||||
- See the :doc:`ROCm Ray installation guide <rocm-install-on-linux:install/3rd-party/ray-install>`
|
||||
for instructions to get started.
|
||||
|
||||
- See the `Installation section <https://docs.ray.io/en/latest/ray-overview/installation.html>`_
|
||||
in the upstream Ray documentation.
|
||||
|
||||
- The Docker image provided is based on the upstream Ray `Daily Release (Nightly) wheels <https://docs.ray.io/en/latest/ray-overview/installation.html#daily-releases-nightlies>`__
|
||||
corresponding to commit `005c372 <https://github.com/ray-project/ray/commit/005c372262e050d5745f475e22e64305fa07f8b8>`__.
|
||||
|
||||
.. note::
|
||||
|
||||
Ray is supported on ROCm 6.4.1.
|
||||
|
||||
Supported devices
|
||||
================================================================================
|
||||
|
||||
**Officially Supported**: AMD Instinct™ MI300X, MI210
|
||||
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* The `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm
|
||||
Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__
|
||||
blog provides an overview of Volcano Engine Reinforcement Learning (verl)
|
||||
for large language models (LLMs) and discusses its benefits in large-scale
|
||||
reinforcement learning from human feedback (RLHF). It uses Ray as part of a
|
||||
hybrid orchestration engine to schedule and coordinate training and inference
|
||||
tasks in parallel, enabling optimized resource utilization and potential overlap
|
||||
between these phases. This dynamic resource allocation strategy significantly
|
||||
improves overall system efficiency. The blog presents verl’s performance results,
|
||||
focusing on throughput and convergence accuracy achieved on AMD Instinct™ MI300X
|
||||
GPUs. Follow this guide to get started with verl on AMD Instinct GPUs and
|
||||
accelerate your RLHF training with ROCm-optimized performance.
|
||||
|
||||
* The `Exploring Use Cases for Scalable AI: Implementing Ray with ROCm Support for Efficient ML Workflows
|
||||
<https://rocm.blogs.amd.com/artificial-intelligence/rocm-ray/README.html>`__
|
||||
blog post describes key use cases such as training and inference for large language models (LLMs),
|
||||
model serving, hyperparameter tuning, reinforcement learning, and the orchestration of large-scale
|
||||
workloads using Ray in the ROCm environment.
|
||||
|
||||
For more use cases and recommendations, see the AMD GPU tabs in the `Accelerator Support
|
||||
topic <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#accelerator-support>`__
|
||||
of the Ray core documentation and refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||
where you can search for Ray examples and best practices to optimize your workloads on AMD GPUs.
|
||||
|
||||
.. _ray-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm Ray Docker images <https://hub.docker.com/r/rocm/ray/tags>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories represent the latest Ray version from the official Docker Hub and are validated for
|
||||
`ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_. Click the |docker-icon|
|
||||
icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- Ray
|
||||
- Pytorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/ray/ray-2.48.0.post0_rocm6.4.1_ubuntu24.04_py3.12_pytorch2.6.0/images/sha256-0d166fe6bdced38338c78eedfb96eff92655fb797da3478a62dd636365133cc0"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
|
||||
- `2.48.0.post0 <https://github.com/ROCm/ray/tree/release/2.48.0.post0>`_
|
||||
- 2.6.0+git684f6f2
|
||||
- 24.04
|
||||
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
|
||||
@@ -1,100 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Stanford Megatron-LM compatibility
|
||||
:keywords: Stanford, Megatron-LM, compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
Stanford Megatron-LM compatibility
|
||||
********************************************************************************
|
||||
|
||||
Stanford Megatron-LM is a large-scale language model training framework developed by NVIDIA `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. It is
|
||||
designed to train massive transformer-based language models efficiently by model and data parallelism.
|
||||
|
||||
* ROCm support for Stanford Megatron-LM is hosted in the official `https://github.com/ROCm/Stanford-Megatron-LM <https://github.com/ROCm/Stanford-Megatron-LM>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>` with ROCm, PyTorch, and Megatron-LM preinstalled.
|
||||
* See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` to install and get started.
|
||||
|
||||
.. note::
|
||||
|
||||
Stanford Megatron-LM is supported on ROCm 6.3.0.
|
||||
|
||||
|
||||
Supported Devices
|
||||
================================================================================
|
||||
|
||||
- **Officially Supported**: AMD Instinct MI300X
|
||||
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
|
||||
|
||||
|
||||
Supported models and features
|
||||
================================================================================
|
||||
|
||||
This section details models & features that are supported by the ROCm version on Stanford Megatron-LM.
|
||||
|
||||
Models:
|
||||
|
||||
* Bert
|
||||
* GPT
|
||||
* T5
|
||||
* ICT
|
||||
|
||||
Features:
|
||||
|
||||
* Distributed Pre-training
|
||||
* Activation Checkpointing and Recomputation
|
||||
* Distributed Optimizer
|
||||
* Mixture-of-Experts
|
||||
|
||||
.. _megatron-lm-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
See the `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs blog <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ post
|
||||
to leverage the ROCm platform for pre-training by using the Stanford Megatron-LM framework of pre-processing datasets on AMD GPUs.
|
||||
Coverage includes:
|
||||
|
||||
* Single-GPU pre-training
|
||||
* Multi-GPU pre-training
|
||||
|
||||
|
||||
.. _megatron-lm-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/megatron-lm>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest Megatron-LM version from the official Docker Hub.
|
||||
The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- Stanford Megatron-LM
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
|
||||
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Taichi compatibility
|
||||
:keywords: GPU, Taichi compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
Taichi compatibility
|
||||
*******************************************************************************
|
||||
|
||||
`Taichi <https://www.taichi-lang.org/>`_ is an open-source, imperative, and parallel
|
||||
programming language designed for high-performance numerical computation.
|
||||
Embedded in Python, it leverages just-in-time (JIT) compilation frameworks such as LLVM to accelerate
|
||||
compute-intensive Python code by compiling it to native GPU or CPU instructions.
|
||||
|
||||
Taichi is widely used across various domains, including real-time physical simulation,
|
||||
numerical computing, augmented reality, artificial intelligence, computer vision, robotics,
|
||||
visual effects in film and gaming, and general-purpose computing.
|
||||
|
||||
* ROCm support for Taichi is hosted in the official `https://github.com/ROCm/taichi <https://github.com/ROCm/taichi>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/taichi-dev <https://github.com/taichi-dev>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker image <taichi-docker-compat>` with ROCm, PyTorch, and Taichi preinstalled.
|
||||
* See the :doc:`ROCm Taichi installation guide <rocm-install-on-linux:install/3rd-party/taichi-install>` to install and get started.
|
||||
|
||||
.. note::
|
||||
|
||||
Taichi is supported on ROCm 6.3.2.
|
||||
|
||||
Supported devices and features
|
||||
===============================================================================
|
||||
There is support through the ROCm software stack for all Taichi GPU features on AMD Instinct MI250X and MI210X series GPUs with the exception of Taichi’s GPU rendering system, CGUI.
|
||||
AMD Instinct MI300X series GPUs will be supported by November.
|
||||
|
||||
.. _taichi-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
To fully leverage Taichi's performance capabilities in compute-intensive tasks, it is best to adhere to specific coding patterns and utilize Taichi decorators.
|
||||
A collection of example use cases is available in the `https://github.com/ROCm/taichi_examples <https://github.com/ROCm/taichi_examples>`_ repository,
|
||||
providing practical insights and foundational knowledge for working with the Taichi programming language.
|
||||
You can also refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_ to search for Taichi examples and best practices to optimize your workflows on AMD GPUs.
|
||||
|
||||
.. _taichi-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm Taichi Docker images <https://hub.docker.com/r/rocm/taichi/tags>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated inventories
|
||||
represent the latest Taichi version from the official Docker Hub.
|
||||
The Docker images have been validated for `ROCm 6.3.2 <https://rocm.docs.amd.com/en/docs-6.3.2/about/release-notes.html>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- Taichi
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/taichi/taichi-1.8.0b1_rocm6.3.2_ubuntu22.04_py3.10.12/images/sha256-e016964a751e6a92199032d23e70fa3a564fff8555afe85cd718f8aa63f11fc6"><i class="fab fa-docker fa-lg"></i> rocm/taichi</a>
|
||||
- `6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_
|
||||
- `1.8.0b1 <https://github.com/taichi-dev/taichi>`_
|
||||
- 22.04
|
||||
- `3.10.12 <https://www.python.org/downloads/release/python-31012/>`_
|
||||
@@ -1,504 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: TensorFlow compatibility
|
||||
:keywords: GPU, TensorFlow compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
TensorFlow compatibility
|
||||
*******************************************************************************
|
||||
|
||||
`TensorFlow <https://www.tensorflow.org/>`__ is an open-source library for
|
||||
solving machine learning, deep learning, and AI problems. It can solve many
|
||||
problems across different sectors and industries but primarily focuses on
|
||||
neural network training and inference. It is one of the most popular and
|
||||
in-demand frameworks and is very active in open-source contribution and
|
||||
development.
|
||||
|
||||
The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`__
|
||||
includes full ROCm support. AMD maintains a TensorFlow `ROCm repository
|
||||
<http://github.com/rocm/tensorflow-upstream>`__ in order to quickly add bug
|
||||
fixes, updates, and support for the latest ROCM versions.
|
||||
|
||||
- ROCm TensorFlow release:
|
||||
|
||||
- Offers :ref:`Docker images <tensorflow-docker-compat>` with
|
||||
ROCm and TensorFlow pre-installed.
|
||||
|
||||
- ROCm TensorFlow repository: `<https://github.com/ROCm/tensorflow-upstream>`__
|
||||
|
||||
- See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
|
||||
to get started.
|
||||
|
||||
- Official TensorFlow release:
|
||||
|
||||
- Official TensorFlow repository: `<https://github.com/tensorflow/tensorflow>`__
|
||||
|
||||
- See the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list.
|
||||
|
||||
.. note::
|
||||
|
||||
The official TensorFlow documentation does not cover ROCm support. Use the
|
||||
ROCm documentation for installation instructions for Tensorflow on ROCm.
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/tensorflow-install`.
|
||||
|
||||
.. _tensorflow-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
===============================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `TensorFlow images
|
||||
<https://hub.docker.com/r/rocm/tensorflow>`__ with ROCm backends on
|
||||
Docker Hub. The following Docker image tags and associated inventories are
|
||||
validated for `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__. Click
|
||||
the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: TensorFlow Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- TensorFlow
|
||||
- Ubuntu
|
||||
- Python
|
||||
- TensorBoard
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
|
||||
|
||||
Critical ROCm libraries for TensorFlow
|
||||
===============================================================================
|
||||
|
||||
TensorFlow depends on multiple components and the supported features of those
|
||||
components can affect the TensorFlow ROCm supported feature set. The versions
|
||||
in the following table refer to the first TensorFlow version where the ROCm
|
||||
library was introduced as a dependency. The versions described
|
||||
are available in ROCm :version:`rocm_version`.
|
||||
|
||||
.. list-table::
|
||||
:widths: 25, 10, 35, 30
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
- Used in
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Accelerates operations like ``tf.matmul``, ``tf.linalg.matmul``, and
|
||||
other matrix multiplications commonly used in neural network layers.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- Extends hipBLAS with additional optimizations like fused kernels and
|
||||
integer tensor cores.
|
||||
- Optimizes matrix multiplications and linear algebra operations used in
|
||||
layers like dense, convolutional, and RNNs in TensorFlow.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`__
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Supports operations like ``tf.reduce_sum``, ``tf.cumsum``, ``tf.sort``
|
||||
and other tensor operations in TensorFlow, especially those involving
|
||||
scanning, sorting, and filtering.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`__
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Accelerates Fast Fourier Transforms (FFT) for signal processing tasks.
|
||||
- Used for operations like signal processing, image filtering, and
|
||||
certain types of neural networks requiring FFT-based transformations.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`__
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated direct linear solvers for dense and sparse
|
||||
systems.
|
||||
- Optimizes linear algebra functions such as solving systems of linear
|
||||
equations, often used in optimization and training tasks.
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`__
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Optimizes sparse matrix operations for efficient computations on sparse
|
||||
data.
|
||||
- Accelerates sparse matrix operations in models with sparse weight
|
||||
matrices or activations, commonly used in neural networks.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`__
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Provides optimized deep learning primitives such as convolutions,
|
||||
pooling,
|
||||
normalization, and activation functions.
|
||||
- Speeds up convolutional neural networks (CNNs) and other layers. Used
|
||||
in TensorFlow for layers like ``tf.nn.conv2d``, ``tf.nn.relu``, and
|
||||
``tf.nn.lstm_cell``.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`__
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||
Broadcast.
|
||||
- Distributed data parallel training (``tf.distribute.MirroredStrategy``).
|
||||
Handles communication in multi-GPU setups.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`__
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
- Reduction operations like ``tf.reduce_sum``, ``tf.cumsum`` for computing
|
||||
the cumulative sum of elements along a given axis or ``tf.unique`` to
|
||||
finds unique elements in a tensor can use rocThrust.
|
||||
|
||||
Supported and unsupported features
|
||||
===============================================================================
|
||||
|
||||
The following section maps supported data types and GPU-accelerated TensorFlow
|
||||
features to their minimum supported ROCm and TensorFlow versions.
|
||||
|
||||
Data types
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The data type of a tensor is specified using the ``dtype`` attribute or
|
||||
argument, and TensorFlow supports a wide range of data types for different use
|
||||
cases.
|
||||
|
||||
The basic, single data types of `tf.dtypes <https://www.tensorflow.org/api_docs/python/tf/dtypes>`__
|
||||
are as follows:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
- Since ROCm
|
||||
* - ``bfloat16``
|
||||
- 16-bit bfloat (brain floating point).
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``bool``
|
||||
- Boolean.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``complex128``
|
||||
- 128-bit complex.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``complex64``
|
||||
- 64-bit complex.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``double``
|
||||
- 64-bit (double precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``float16``
|
||||
- 16-bit (half precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``float32``
|
||||
- 32-bit (single precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``float64``
|
||||
- 64-bit (double precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``half``
|
||||
- 16-bit (half precision) floating-point.
|
||||
- 2.0.0
|
||||
- 2.0
|
||||
* - ``int16``
|
||||
- Signed 16-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``int32``
|
||||
- Signed 32-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``int64``
|
||||
- Signed 64-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``int8``
|
||||
- Signed 8-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``qint16``
|
||||
- Signed quantized 16-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``qint32``
|
||||
- Signed quantized 32-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``qint8``
|
||||
- Signed quantized 8-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``quint16``
|
||||
- Unsigned quantized 16-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``quint8``
|
||||
- Unsigned quantized 8-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``resource``
|
||||
- Handle to a mutable, dynamically allocated resource.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``string``
|
||||
- Variable-length string, represented as byte array.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``uint16``
|
||||
- Unsigned 16-bit (word) integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``uint32``
|
||||
- Unsigned 32-bit (dword) integer.
|
||||
- 1.5.0
|
||||
- 1.7
|
||||
* - ``uint64``
|
||||
- Unsigned 64-bit (qword) integer.
|
||||
- 1.5.0
|
||||
- 1.7
|
||||
* - ``uint8``
|
||||
- Unsigned 8-bit (byte) integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``variant``
|
||||
- Data of arbitrary type (known at runtime).
|
||||
- 1.4.0
|
||||
- 1.7
|
||||
|
||||
Features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This table provides an overview of key features in TensorFlow and their
|
||||
availability in ROCm.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
- Since ROCm
|
||||
* - ``tf.linalg`` (Linear Algebra)
|
||||
- Operations for matrix and tensor computations, such as
|
||||
``tf.linalg.matmul`` (matrix multiplication), ``tf.linalg.inv``
|
||||
(matrix inversion) and ``tf.linalg.cholesky`` (Cholesky decomposition).
|
||||
These leverage GPUs for high-performance linear algebra operations.
|
||||
- 1.4
|
||||
- 1.8.2
|
||||
* - ``tf.nn`` (Neural Network Operations)
|
||||
- GPU-accelerated building blocks for deep learning models, such as 2D
|
||||
convolutions with ``tf.nn.conv2d``, max pooling operations with
|
||||
``tf.nn.max_pool``, activation functions like ``tf.nn.relu`` or softmax
|
||||
for output layers with ``tf.nn.softmax``.
|
||||
- 1.0
|
||||
- 1.8.2
|
||||
* - ``tf.image`` (Image Processing)
|
||||
- GPU-accelerated functions for image preprocessing and augmentations,
|
||||
such as resize images with ``tf.image.resize``, flip images horizontally
|
||||
with ``tf.image.flip_left_right`` and adjust image brightness randomly
|
||||
with ``tf.image.random_brightness``.
|
||||
- 1.1
|
||||
- 1.8.2
|
||||
* - ``tf.keras`` (High-Level API)
|
||||
- GPU acceleration for Keras layers and models, including dense layers
|
||||
(``tf.keras.layers.Dense``), convolutional layers
|
||||
(``tf.keras.layers.Conv2D``) and recurrent layers
|
||||
(``tf.keras.layers.LSTM``).
|
||||
- 1.4
|
||||
- 1.8.2
|
||||
* - ``tf.math`` (Mathematical Operations)
|
||||
- GPU-accelerated mathematical operations, such as sum across dimensions
|
||||
with ``tf.math.reduce_sum``, elementwise exponentiation with
|
||||
``tf.math.exp`` and sigmoid activation (``tf.math.sigmoid``).
|
||||
- 1.5
|
||||
- 1.8.2
|
||||
* - ``tf.signal`` (Signal Processing)
|
||||
- Functions for spectral analysis and signal transformations.
|
||||
- 1.13
|
||||
- 2.1
|
||||
* - ``tf.data`` (Data Input Pipeline)
|
||||
- GPU-accelerated data preprocessing for efficient input pipelines,
|
||||
Prefetching with ``tf.data.experimental.AUTOTUNE``. GPU-enabled
|
||||
transformations like map and batch.
|
||||
- 1.4
|
||||
- 1.8.2
|
||||
* - ``tf.distribute`` (Distributed Training)
|
||||
- Enabling to scale computations across multiple devices on a single
|
||||
machine or across multiple machines.
|
||||
- 1.13
|
||||
- 2.1
|
||||
* - ``tf.random`` (Random Number Generation)
|
||||
- GPU-accelerated random number generation
|
||||
- 1.12
|
||||
- 1.9.2
|
||||
* - ``tf.TensorArray`` (Dynamic Array Operations)
|
||||
- Enables dynamic tensor manipulation on GPUs.
|
||||
- 1.0
|
||||
- 1.8.2
|
||||
* - ``tf.sparse`` (Sparse Tensor Operations)
|
||||
- GPU-accelerated sparse matrix manipulations.
|
||||
- 1.9
|
||||
- 1.9.0
|
||||
* - ``tf.experimental.numpy``
|
||||
- GPU-accelerated NumPy-like API for numerical computations.
|
||||
- 2.4
|
||||
- 4.1.1
|
||||
* - ``tf.RaggedTensor``
|
||||
- Handling of variable-length sequences and ragged tensors with GPU
|
||||
support.
|
||||
- 1.13
|
||||
- 2.1
|
||||
* - ``tf.function`` with XLA (Accelerated Linear Algebra)
|
||||
- Enable GPU-accelerated functions in optimization.
|
||||
- 1.14
|
||||
- 2.4
|
||||
* - ``tf.quantization``
|
||||
- Quantized operations for inference, accelerated on GPUs.
|
||||
- 1.12
|
||||
- 1.9.2
|
||||
|
||||
Distributed library features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Enables developers to scale computations across multiple devices on a single machine or
|
||||
across multiple machines.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
- Since ROCm
|
||||
* - ``MultiWorkerMirroredStrategy``
|
||||
- Synchronous training across multiple workers using mirrored variables.
|
||||
- 2.0
|
||||
- 3.0
|
||||
* - ``MirroredStrategy``
|
||||
- Synchronous training across multiple GPUs on one machine.
|
||||
- 1.5
|
||||
- 2.5
|
||||
* - ``TPUStrategy``
|
||||
- Efficiently trains models on Google TPUs.
|
||||
- 1.9
|
||||
- ❌
|
||||
* - ``ParameterServerStrategy``
|
||||
- Asynchronous training using parameter servers for variable management.
|
||||
- 2.1
|
||||
- 4.0
|
||||
* - ``CentralStorageStrategy``
|
||||
- Keeps variables on a single device and performs computation on multiple
|
||||
devices.
|
||||
- 2.3
|
||||
- 4.1
|
||||
* - ``CollectiveAllReduceStrategy``
|
||||
- Synchronous training across multiple devices and hosts.
|
||||
- 1.14
|
||||
- 3.5
|
||||
* - Distribution Strategies API
|
||||
- High-level API to simplify distributed training configuration and
|
||||
execution.
|
||||
- 1.10
|
||||
- 3.0
|
||||
|
||||
Unsupported TensorFlow features
|
||||
===============================================================================
|
||||
|
||||
The following are GPU-accelerated TensorFlow features not currently supported by
|
||||
ROCm.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
* - Mixed Precision with TF32
|
||||
- Mixed precision with TF32 is used for matrix multiplications,
|
||||
convolutions, and other linear algebra operations, particularly in
|
||||
deep learning workloads like CNNs and transformers.
|
||||
- 2.4
|
||||
* - ``tf.distribute.TPUStrategy``
|
||||
- Efficiently trains models on Google TPUs.
|
||||
- 1.9
|
||||
|
||||
Use cases and recommendations
|
||||
===============================================================================
|
||||
|
||||
* The `Training a Neural Collaborative Filtering (NCF) Recommender on an AMD
|
||||
GPU <https://rocm.blogs.amd.com/artificial-intelligence/ncf/README.html>`__
|
||||
blog post discusses training an NCF recommender system using TensorFlow. It
|
||||
explains how NCF improves traditional collaborative filtering methods by
|
||||
leveraging neural networks to model non-linear user-item interactions. The
|
||||
post outlines the implementation using the recommenders library, focusing on
|
||||
the use of implicit data (for example, user interactions like viewing or
|
||||
purchasing) and how it addresses challenges like the lack of negative values.
|
||||
|
||||
* The `Creating a PyTorch/TensorFlow code environment on AMD GPUs
|
||||
<https://rocm.blogs.amd.com/software-tools-optimization/pytorch-tensorflow-env/README.html>`__
|
||||
blog post provides instructions for creating a machine learning environment
|
||||
for PyTorch and TensorFlow on AMD GPUs using ROCm. It covers steps like
|
||||
installing the libraries, cloning code repositories, installing dependencies,
|
||||
and troubleshooting potential issues with CUDA-based code. Additionally, it
|
||||
explains how to HIPify code (port CUDA code to HIP) and manage Docker images
|
||||
for a better experience on AMD GPUs. This guide aims to help data scientists
|
||||
and ML practitioners adapt their code for AMD GPUs.
|
||||
|
||||
For more use cases and recommendations, see the `ROCm Tensorflow blog posts <https://rocm.blogs.amd.com/blog/tag/tensorflow.html>`__.
|
||||
@@ -1,86 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: verl compatibility
|
||||
:keywords: GPU, verl compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
verl compatibility
|
||||
*******************************************************************************
|
||||
|
||||
Volcano Engine Reinforcement Learning for LLMs (verl) is a reinforcement learning framework designed for large language models (LLMs).
|
||||
verl offers a scalable, open-source fine-tuning solution optimized for AMD Instinct GPUs with full ROCm support.
|
||||
|
||||
* See the `verl documentation <https://verl.readthedocs.io/en/latest/>`_ for more information about verl.
|
||||
* The official verl GitHub repository is `https://github.com/volcengine/verl <https://github.com/volcengine/verl>`_.
|
||||
* Use the AMD-validated :ref:`Docker images <verl-docker-compat>` with ROCm and verl preinstalled.
|
||||
* See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>` to install and get started.
|
||||
|
||||
.. note::
|
||||
|
||||
verl is supported on ROCm 6.2.0.
|
||||
|
||||
.. _verl-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
The benefits of verl in large-scale reinforcement learning from human feedback (RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`_ blog.
|
||||
|
||||
.. _verl-supported_features:
|
||||
|
||||
Supported features
|
||||
===============================================================================
|
||||
|
||||
The following table shows verl on ROCm support for GPU-accelerated modules.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Description
|
||||
- verl version
|
||||
- ROCm version
|
||||
* - ``FSDP``
|
||||
- Training engine
|
||||
- 0.3.0.post0
|
||||
- 6.2.0
|
||||
* - ``vllm``
|
||||
- Inference engine
|
||||
- 0.3.0.post0
|
||||
- 6.2.0
|
||||
|
||||
.. _verl-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated inventories represent the available verl versions from the official Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- verl
|
||||
- Ubuntu
|
||||
- Pytorch
|
||||
- Python
|
||||
- vllm
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
|
||||
- `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`_
|
||||
- `0.3.0post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`_
|
||||
- 20.04
|
||||
- `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- `3.9.19 <https://www.python.org/downloads/release/python-3919/>`_
|
||||
- `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`_
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user