mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 22:58:17 -05:00
Compare commits
183 Commits
falcon
...
docs/7.0-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02180e7bf7 | ||
|
|
b66ce0b0a6 | ||
|
|
ac00f08a03 | ||
|
|
4667fbfc83 | ||
|
|
7cb2143e15 | ||
|
|
dc56ba46b6 | ||
|
|
56d74cb542 | ||
|
|
f92dde618b | ||
|
|
9e7a2e711c | ||
|
|
bca2c255bb | ||
|
|
fbfc0bf70d | ||
|
|
8f24297ac5 | ||
|
|
481157e567 | ||
|
|
cc4262185a | ||
|
|
cc7a042501 | ||
|
|
d26a9c8162 | ||
|
|
edf400789a | ||
|
|
7d5abf2dae | ||
|
|
c895490f80 | ||
|
|
95285875d0 | ||
|
|
596a120566 | ||
|
|
684fd6102f | ||
|
|
fb8d86df6a | ||
|
|
da26a1c9cb | ||
|
|
df84dadd43 | ||
|
|
bc6d48ef94 | ||
|
|
4728b201a6 | ||
|
|
a7abd5f67c | ||
|
|
f7e670d36c | ||
|
|
6cebe026b1 | ||
|
|
2134313c4a | ||
|
|
08e5cb4321 | ||
|
|
22732e81a2 | ||
|
|
975c4036a4 | ||
|
|
6df8002b08 | ||
|
|
0f261049bb | ||
|
|
8c7378ba71 | ||
|
|
dcc949f441 | ||
|
|
a4b1b2cc67 | ||
|
|
4f592f8949 | ||
|
|
ac2df2961d | ||
|
|
f20e8dec8b | ||
|
|
10e9157f39 | ||
|
|
a2ce6021cb | ||
|
|
2196fc9a2f | ||
|
|
925689f89e | ||
|
|
91a541f8b9 | ||
|
|
34f8d57ece | ||
|
|
55f95adc7c | ||
|
|
e05b1702d8 | ||
|
|
4179042cf7 | ||
|
|
ae2de81b79 | ||
|
|
efd6cec4a4 | ||
|
|
b65996587f | ||
|
|
7b7eaf69f2 | ||
|
|
4cfc8ddad2 | ||
|
|
97ebbb227d | ||
|
|
8c6a1726fe | ||
|
|
2656143c9e | ||
|
|
7910841c94 | ||
|
|
30fec8f74a | ||
|
|
1923f801e0 | ||
|
|
d69037bfcc | ||
|
|
7ac6aa4084 | ||
|
|
14f3c42320 | ||
|
|
67be6f6249 | ||
|
|
2502fc5bcf | ||
|
|
61c6749a10 | ||
|
|
8e8104c811 | ||
|
|
cfb3504d77 | ||
|
|
3602bc5142 | ||
|
|
cf4a8ecf28 | ||
|
|
a5aae151b7 | ||
|
|
685457834a | ||
|
|
1c715356b6 | ||
|
|
ca8df59ba8 | ||
|
|
ad1ac5a4e8 | ||
|
|
e2d0f4a362 | ||
|
|
f0bef19f15 | ||
|
|
204032493b | ||
|
|
934dd0892c | ||
|
|
9c38a9cf71 | ||
|
|
894f137cda | ||
|
|
d331b19ede | ||
|
|
1416c355e3 | ||
|
|
c5f9be0375 | ||
|
|
769eee92bf | ||
|
|
c6baf14252 | ||
|
|
87d5a210c1 | ||
|
|
830f2d5edf | ||
|
|
1dd4b4230d | ||
|
|
5346748889 | ||
|
|
2e32d1d3d4 | ||
|
|
9cff634d8c | ||
|
|
3f3d592e2b | ||
|
|
e12996054e | ||
|
|
ff11bd392e | ||
|
|
53d3e092d3 | ||
|
|
6b5586fd2c | ||
|
|
b8e115d56a | ||
|
|
c1919faccd | ||
|
|
231dc6be36 | ||
|
|
6142df329b | ||
|
|
2addcb0bca | ||
|
|
106cecba5e | ||
|
|
d62d12fd55 | ||
|
|
bae5bdd177 | ||
|
|
8afca4af80 | ||
|
|
0c3e2ea01d | ||
|
|
086089128e | ||
|
|
6999c24402 | ||
|
|
93fd0ef1d4 | ||
|
|
daf2e980d9 | ||
|
|
f10d80f6cc | ||
|
|
272c9eabc9 | ||
|
|
a62560342e | ||
|
|
21a4565e02 | ||
|
|
394e3ffe11 | ||
|
|
2eb8bf4963 | ||
|
|
9dbc10b4c5 | ||
|
|
cebf0f5975 | ||
|
|
0acb457389 | ||
|
|
9e23c2ea2b | ||
|
|
080a7339f0 | ||
|
|
932d6f551b | ||
|
|
fef80c324d | ||
|
|
5ffc336620 | ||
|
|
a6c5fc4a2c | ||
|
|
32939eed40 | ||
|
|
454331ba59 | ||
|
|
637174f644 | ||
|
|
934ce63840 | ||
|
|
230b01565f | ||
|
|
505041d90a | ||
|
|
3ba79f9431 | ||
|
|
714b5395f8 | ||
|
|
9ed65a81c4 | ||
|
|
6d9f430c70 | ||
|
|
7697298f5d | ||
|
|
854bd268bf | ||
|
|
f1f2b3cac2 | ||
|
|
e265ee53ba | ||
|
|
3f56efcb3b | ||
|
|
98fde2bff1 | ||
|
|
0e8b745266 | ||
|
|
02a8a6e5df | ||
|
|
f118318f98 | ||
|
|
47e4ec8b3a | ||
|
|
58a62bc00e | ||
|
|
56d258592d | ||
|
|
8dc7016405 | ||
|
|
8686bca1b4 | ||
|
|
82d15a09f5 | ||
|
|
42e0c0cfba | ||
|
|
ddcad120a2 | ||
|
|
b8892f2c33 | ||
|
|
ab384a1b6e | ||
|
|
27db6ef0b3 | ||
|
|
707d6c022f | ||
|
|
3bafe307bf | ||
|
|
ca5d0d0000 | ||
|
|
e35efbae09 | ||
|
|
0d7846fbab | ||
|
|
92a9c88fe3 | ||
|
|
2a3c2fe5aa | ||
|
|
156917e15d | ||
|
|
d7a9280008 | ||
|
|
c1825ba41c | ||
|
|
0a77e7b3a5 | ||
|
|
a940f3f090 | ||
|
|
95415d5e70 | ||
|
|
d1772b9ca3 | ||
|
|
f65e1412df | ||
|
|
ea1072b11d | ||
|
|
90a651d2b6 | ||
|
|
16978a382b | ||
|
|
dc23bb09c2 | ||
|
|
bb7af3351a | ||
|
|
8ef1bb0139 | ||
|
|
1610837a95 | ||
|
|
b7ce573c66 | ||
|
|
186c281aba | ||
|
|
d44ea40a0d |
33
.azuredevops/ci-builds/mathlibs-trigger.yml
Normal file
33
.azuredevops/ci-builds/mathlibs-trigger.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml@pipelines_repo
|
||||
|
||||
parameters:
|
||||
- name: pipelinesRepoRef
|
||||
type: string
|
||||
default: refs/heads/develop
|
||||
- name: librariesRepoRef
|
||||
type: string
|
||||
default: refs/heads/develop
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
ref: ${{ parameters.pipelinesRepoRef }}
|
||||
- repository: libraries_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/rocm-libraries
|
||||
ref: ${{ parameters.librariesRepoRef }}
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: /.azuredevops/ci-builds/mathlibs.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: libraries_repo
|
||||
buildDependsOn: false
|
||||
38
.azuredevops/ci-builds/mathlibs.yml
Normal file
38
.azuredevops/ci-builds/mathlibs.yml
Normal file
@@ -0,0 +1,38 @@
|
||||
# entrypoint for kicking off a unified build of the mathlibs
|
||||
# this template is designed to be called by another pipeline (llvm, clr, etc.)
|
||||
# `buildDependsOn` will need to be set when calling this template
|
||||
# passes a `unifiedBuild` param to downstream pipelines, which will prevent duplicate jobs
|
||||
# logic needs to be added in individual mathlib pipelines for handling `unifiedBuild`
|
||||
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: monorepo
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: false
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocRAND:
|
||||
name: rocRAND
|
||||
sparseCheckoutDir: projects/rocrand
|
||||
- rocPRIM:
|
||||
name: rocPRIM
|
||||
sparseCheckoutDir: projects/rocprim
|
||||
- hipBLAS-common:
|
||||
name: hipBLAS-common
|
||||
sparseCheckoutDir: projects/hipblas-common
|
||||
# - composable_kernel:
|
||||
# name: composable_kernel
|
||||
# sparseCheckoutDir: projects/composablekernel
|
||||
|
||||
jobs:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ parameters.buildDependsOn }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: true
|
||||
@@ -20,7 +20,7 @@ parameters:
|
||||
- ocl-icd-libopencl1
|
||||
- ocl-icd-opencl-dev
|
||||
- opencl-headers
|
||||
- python3-pip
|
||||
- zlib1g-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -41,118 +41,148 @@ parameters:
|
||||
# any changes for clr should just trigger HIP pipeline
|
||||
# similarly for hipother repo, for Nvidia backend
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
# HIP with AMD backend
|
||||
jobs:
|
||||
- job: hip_clr_combined_amd
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: 'clr/build'
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=amd
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: amd
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hip_clr_combined_${{ job.os }}_amd
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=amd
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: amd
|
||||
|
||||
# HIP with Nvidia backend
|
||||
- job: hip_clr_combined_nvidia
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
|
||||
displayName: 'Artifact listing'
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: 'clr/build'
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=nvidia
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=OFF
|
||||
-DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: nvidia
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: nvidia
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hip_clr_combined_${{ job.os }}_nvidia
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
|
||||
displayName: 'Artifact listing'
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=nvidia
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=OFF
|
||||
-DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: nvidia
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: nvidia
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: HIPIFY
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -13,112 +16,140 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- cuda-toolkit-12-9
|
||||
- libcudnn9-dev-cuda-12
|
||||
- libnuma-dev
|
||||
- mesa-common-dev
|
||||
- ninja-build
|
||||
- python-is-python3
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- python-is-python3
|
||||
- mesa-common-dev
|
||||
- ccache
|
||||
- cuda-toolkit
|
||||
- cudnn
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- lit
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- llvm-project
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: HIPIFY
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: UPSTREAM_LLVM_GIT_URL
|
||||
value: https://github.com/llvm/llvm-project.git
|
||||
- name: UPSTREAM_LLVM_TAG
|
||||
value: llvmorg-18.1.2
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: 'Register CUDA packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo rm -f cuda-keyring_1.1-1_all.deb
|
||||
sudo apt update
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: git clone upstream llvm-project
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone $(UPSTREAM_LLVM_GIT_URL) --depth=1 --branch $(UPSTREAM_LLVM_TAG) --recurse-submodules
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- script: |
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
echo "##vso[task.prependpath]/usr/lib/ccache:/usr/local/cuda/bin"
|
||||
displayName: Update path for cuda and ccache
|
||||
- task: Cache@2
|
||||
displayName: Ccache caching
|
||||
inputs:
|
||||
key: HIPIFY | $(Agent.OS) | "$(UPSTREAM_LLVM_TAG)"
|
||||
path: $(CCACHE_DIR)
|
||||
restoreKeys: HIPIFY | $(Agent.OS)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: upstream-llvm
|
||||
cmakeBuildDir: $(Pipeline.Workspace)/llvm-project/llvm/build
|
||||
installDir: $(Pipeline.Workspace)/llvm
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DLLVM_ENABLE_PROJECTS=clang
|
||||
-DLLVM_INCLUDE_TESTS=OFF
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-GNinja
|
||||
- task: Bash@3
|
||||
displayName: python install lit
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo python3 $(Pipeline.Workspace)/llvm-project/llvm/utils/lit/setup.py install
|
||||
- task: Bash@3
|
||||
displayName: install FileCheck
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: cp $(Pipeline.Workspace)/llvm-project/llvm/build/bin/FileCheck $(Pipeline.Workspace)/llvm/bin
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: HIPIFY
|
||||
extraBuildFlags: >-
|
||||
-DHIPIFY_CLANG_TESTS=ON
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/targets/x86_64-linux
|
||||
-DCUDA_DNN_ROOT_DIR=/usr/local/cuda/targets/x86_64-linux
|
||||
-DCMAKE_PREFIX_PATH=$(Pipeline.Workspace)/llvm;/usr/local/cuda/targets/x86_64-linux/lib
|
||||
-DLLVM_EXTERNAL_LIT=$(Pipeline.Workspace)/llvm-project/llvm/build/bin/llvm-lit
|
||||
multithreadFlag: -- -j32
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: HIPIFY
|
||||
testDir: $(Build.SourcesDirectory)/build
|
||||
testExecutable: make
|
||||
testParameters: test-hipify
|
||||
testPublishResults: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: combined
|
||||
registerCUDAPackages: true
|
||||
extraCopyDirectories:
|
||||
- llvm-project
|
||||
extraEnvVars:
|
||||
- UPSTREAM_LLVM_GIT_URL:::https://github.com/llvm/llvm-project.git
|
||||
- UPSTREAM_LLVM_TAG:::llvmorg-18.1.2
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
name: rocm-ci_medium_build_pool_2404
|
||||
${{ else }}:
|
||||
name: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: 'Register CUDA packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo rm -f cuda-keyring_1.1-1_all.deb
|
||||
sudo apt update
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
script: |
|
||||
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- task: Bash@3
|
||||
displayName: Add lit to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
site_packages=$(python3 -m site --user-base)/bin
|
||||
sudo ln -sf $site_packages/bin/lit $(Pipeline.Workspace)/llvm-lit
|
||||
echo "##vso[task.prependpath]$site_packages"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# cutensor is not available from apt or dnf
|
||||
- task: Bash@3
|
||||
displayName: 'Download and install cutensor'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
wget -q --show-progress https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/linux-x86_64/libcutensor-linux-x86_64-2.2.0.0-archive.tar.xz
|
||||
tar -xvJf libcutensor-linux-x86_64-*.tar.xz
|
||||
mkdir -p $(Pipeline.Workspace)/cutensor
|
||||
cp -r libcutensor-linux-x86_64-*/* $(Pipeline.Workspace)/cutensor/
|
||||
- task: Bash@3
|
||||
displayName: 'List downloaded CUDA files'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ls -la1R /usr/local/cuda-12.9
|
||||
# script: cp $(Pipeline.Workspace)/llvm-project/llvm/build/bin/FileCheck $(Pipeline.Workspace)/llvm/bin
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;/usr/local/cuda/targets/x86_64-linux/lib
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DHIPIFY_CLANG_TESTS=ON
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.9
|
||||
-DCUDA_DNN_ROOT_DIR=/usr/local/cuda-12.9
|
||||
-DCUDA_CUB_ROOT_DIR=/usr/local/cuda-12.9/targets/x86_64-linux/include/cub
|
||||
-DCUDA_TENSOR_ROOT_DIR=$(Pipeline.Workspace)/cutensor/
|
||||
multithreadFlag: -- -j32
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
# parameters:
|
||||
# componentName: HIPIFY
|
||||
# testDir: $(Build.SourcesDirectory)/build
|
||||
# testExecutable: make
|
||||
# testParameters: -j 32 test-hipify
|
||||
# testPublishResults: false
|
||||
# os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: combined
|
||||
registerCUDAPackages: true
|
||||
extraCopyDirectories:
|
||||
- llvm-project
|
||||
|
||||
@@ -16,6 +16,7 @@ parameters:
|
||||
- cmake
|
||||
- jq
|
||||
- libdrm-dev
|
||||
- libmsgpack-dev
|
||||
- libsqlite3-dev
|
||||
- libstdc++-12-dev
|
||||
- ninja-build
|
||||
|
||||
@@ -43,18 +43,20 @@ parameters:
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- AMDMIGraphX
|
||||
- clr
|
||||
- half
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- llvm-project
|
||||
- MIOpen
|
||||
- rocBLAS
|
||||
- rocDecode
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- half
|
||||
- rocBLAS
|
||||
- MIOpen
|
||||
- AMDMIGraphX
|
||||
- ROCR-Runtime
|
||||
- rpp
|
||||
- rocDecode
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -90,8 +92,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -20,7 +20,6 @@ parameters:
|
||||
- libnuma-dev
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -36,51 +35,65 @@ parameters:
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: ROCR_Runtime_build
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_SHARED_LIBS=ON
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ROCR_Runtime_build_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_SHARED_LIBS=ON
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ROCR_Runtime_test_${{ job.target }}
|
||||
dependsOn: ROCR_Runtime_build
|
||||
- job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ROCR_Runtime_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -97,6 +110,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- task: Bash@3
|
||||
displayName: Install libhwloc5
|
||||
inputs:
|
||||
@@ -107,12 +121,15 @@ jobs:
|
||||
sudo apt install -y --allow-downgrades ./libhwloc5_1.11.12-3_amd64.deb ./libhwloc-dev_1.11.12-3_amd64.deb
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
@@ -121,11 +138,13 @@ jobs:
|
||||
runRocminfo: false
|
||||
- task: Bash@3
|
||||
displayName: Build kfdtest
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest
|
||||
script: |
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
|
||||
make
|
||||
@@ -135,13 +154,16 @@ jobs:
|
||||
testExecutable: BIN_DIR=$(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
|
||||
testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Build rocrtst
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
|
||||
script: |
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
|
||||
export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
|
||||
mkdir build && cd build
|
||||
@@ -159,6 +181,7 @@ jobs:
|
||||
testExecutable: ./rocrtst64
|
||||
testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Build.SourcesDirectory)/rocrtst/suites/test_common/build/${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -15,7 +15,6 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -24,37 +23,57 @@ parameters:
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
- job: ROCdbgapi
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ROCdbgapi_build_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: ROCgdb
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -15,6 +18,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- bison
|
||||
- cmake
|
||||
- dejagnu
|
||||
- flex
|
||||
- libbabeltrace-dev
|
||||
@@ -22,8 +26,10 @@ parameters:
|
||||
- libgmp-dev
|
||||
- liblzma-dev
|
||||
- libmpfr-dev
|
||||
- pkg-config
|
||||
- ncurses-dev
|
||||
- pkg-config
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- texinfo
|
||||
- zlib1g-dev
|
||||
- name: rocmDependencies
|
||||
@@ -39,45 +45,53 @@ parameters:
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildTestJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildTestJobs }}:
|
||||
- job: ROCgdb_build_test_${{ job.target }}
|
||||
condition:
|
||||
and(
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PKG_CONFIG_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/share/pkgconfig
|
||||
pool: ${{ job.target }}_test_pool
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
name: rocm-ci_medium_build_pool_2404
|
||||
${{ else }}:
|
||||
name: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
configureFlags: >-
|
||||
--program-prefix=roc
|
||||
--enable-64-bit-bfd
|
||||
@@ -100,7 +114,74 @@ jobs:
|
||||
LDFLAGS="-Wl,--enable-new-dtags,-rpath=$(Agent.BuildDirectory)/rocm/lib"
|
||||
makeCallPrefix: LD_RUN_PATH='${ORIGIN}/../lib'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PKG_CONFIG_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/share/pkgconfig
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
configureFlags: >-
|
||||
--program-prefix=roc
|
||||
--enable-64-bit-bfd
|
||||
--enable-targets="x86_64-linux-gnu,amdgcn-amd-amdhsa"
|
||||
--disable-ld
|
||||
--disable-gas
|
||||
--disable-gdbserver
|
||||
--disable-sim
|
||||
--enable-tui
|
||||
--disable-gdbtk
|
||||
--disable-shared
|
||||
--disable-gprofng
|
||||
--with-expat
|
||||
--with-system-zlib
|
||||
--without-guile
|
||||
--with-babeltrace
|
||||
--with-lzma
|
||||
--with-python=python3
|
||||
--with-rocm-dbgapi=$(Agent.BuildDirectory)/rocm
|
||||
LDFLAGS="-Wl,--enable-new-dtags,-rpath=$(Agent.BuildDirectory)/rocm/lib"
|
||||
makeCallPrefix: LD_RUN_PATH='${ORIGIN}/../lib'
|
||||
- task: Bash@3
|
||||
displayName: Setup test environment
|
||||
inputs:
|
||||
@@ -109,14 +190,15 @@ jobs:
|
||||
# Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
echo "##vso[task.prependpath]/opt/rocm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- task: Bash@3
|
||||
displayName: check-gdb
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: make check-gdb TESTS=gdb.rocm/simple.exp
|
||||
script: |
|
||||
${{ iif(eq(job.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
make check-gdb TESTS=gdb.rocm/simple.exp
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: print gdb log
|
||||
|
||||
@@ -27,6 +27,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
- aomp
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
@@ -43,6 +44,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
- aomp
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
@@ -108,6 +110,7 @@ jobs:
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$(Build.BinariesDirectory)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: Tensile
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -13,7 +32,6 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- python3-pip
|
||||
- cmake
|
||||
- libmsgpack-dev
|
||||
- libboost-program-options-dev
|
||||
@@ -38,75 +56,97 @@ parameters:
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: Tensile_build
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- task: Bash@3
|
||||
displayName: Create wheel file
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 setup.py bdist_wheel
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Build.SourcesDirectory)/dist
|
||||
contentsString: '*.whl'
|
||||
targetDir: $(Build.ArtifactStagingDirectory)
|
||||
clean: false
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'wheel file Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetPath: $(Build.ArtifactStagingDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Save pipeline artifact file names
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: |
|
||||
whlFile=$(find "$(Build.ArtifactStagingDirectory)" -type f -name "*.whl" | head -n 1)
|
||||
if [ -n "$whlFile" ]; then
|
||||
echo $(basename "$whlFile") >> pipelineArtifacts.txt
|
||||
fi
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn: ${{ parameters.buildDependsOn[job.target] }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Create wheel file
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 setup.py bdist_wheel
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
- task: Bash@3
|
||||
displayName: Rename wheel file with job OS
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
script: |
|
||||
wheelFile=$(find "$(Agent.BuildDirectory)/s/dist" -type f -name "*.whl" | head -n 1)
|
||||
newWheelFile="$(basename "$wheelFile" .whl)-${{ job.os }}.whl"
|
||||
mv "$wheelFile" "$(dirname "$wheelFile")/$newWheelFile"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Agent.BuildDirectory)/s/dist
|
||||
contentsString: '*.whl'
|
||||
targetDir: $(Build.ArtifactStagingDirectory)
|
||||
clean: false
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'wheel file Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetPath: $(Build.ArtifactStagingDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Save pipeline artifact file names
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: |
|
||||
whlFile=$(find "$(Build.ArtifactStagingDirectory)" -type f -name "*.whl" | head -n 1)
|
||||
if [ -n "$whlFile" ]; then
|
||||
echo $(basename "$whlFile") >> pipelineArtifacts.txt
|
||||
fi
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: Tensile_test_${{ job.target }}
|
||||
- job: Tensile_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 180
|
||||
dependsOn: Tensile_build
|
||||
dependsOn: Tensile_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -126,20 +166,23 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
inputs:
|
||||
itemPattern: '**/*.whl'
|
||||
itemPattern: '**/*${{ job.os }}*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: pip install
|
||||
@@ -164,7 +207,7 @@ jobs:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: tox run -v -e ci -- -m pre_checkin
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -16,50 +16,66 @@ parameters:
|
||||
- cmake
|
||||
- libdrm-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- pkg-config
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: amdsmi_build
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DBUILD_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: amdsmi_build_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
${{ else }}:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DBUILD_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: amdsmi_test_${{ job.target }}
|
||||
dependsOn: amdsmi_build
|
||||
- job: amdsmi_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: amdsmi_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -76,8 +92,11 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
@@ -85,8 +104,9 @@ jobs:
|
||||
parameters:
|
||||
componentName: amdsmi
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/share/amd_smi/tests/amdsmitst'
|
||||
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: aomp
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -15,170 +18,187 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- bison
|
||||
- ccache
|
||||
- cmake
|
||||
- python3-pip
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
- libpci-dev
|
||||
- libnuma-dev
|
||||
- libffi-dev
|
||||
- git
|
||||
- libopenmpi-dev
|
||||
- flex
|
||||
- gawk
|
||||
- git
|
||||
- mesa-common-dev
|
||||
- libtool
|
||||
- ninja-build
|
||||
- libbabeltrace-dev
|
||||
- libbison-dev
|
||||
- libdrm-amdgpu1
|
||||
- libdrm-dev
|
||||
- libdw-dev
|
||||
- libgtest-dev
|
||||
- libsystemd-dev
|
||||
- libffi-dev
|
||||
- libgmp-dev
|
||||
- liblzma-dev
|
||||
- libmpfr-dev
|
||||
- libncurses5-dev
|
||||
- libnuma-dev
|
||||
- libopenmpi-dev
|
||||
- libpci-dev
|
||||
- libssl-dev
|
||||
- libstdc++-12-dev
|
||||
- ccache
|
||||
- libgmp-dev
|
||||
- libmpfr-dev
|
||||
- texinfo
|
||||
- libbison-dev
|
||||
- bison
|
||||
- flex
|
||||
- libbabeltrace-dev
|
||||
- libncurses5-dev
|
||||
- liblzma-dev
|
||||
- python3-setuptools
|
||||
- python3-dev
|
||||
- libsystemd-dev
|
||||
- libtool
|
||||
- libudev-dev
|
||||
- parallel
|
||||
# Referencing comment snippet.
|
||||
#
|
||||
# snippet from https://github.com/ROCm/aomp/blob/aomp-dev/bin/build_aomp.sh#L131-L134
|
||||
#
|
||||
# For ROCM build (AOMP_STANDALONE_BUILD=0) the components roct, rocr,
|
||||
# libdevice, project, comgr, rocminfo, hipamd, rocdbgapi, rocgdb,
|
||||
# roctracer, rocprofiler, rocm_smi_lib, and amdsmi should be found
|
||||
# in ROCM in /opt/rocm. The ROCM build only needs these components:
|
||||
- pkg-config
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- python3-setuptools
|
||||
- texinfo
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- ROCdbgapi
|
||||
- ROCgdb
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
- rocm_smi_lib
|
||||
- rocprofiler
|
||||
- rocprofiler-register
|
||||
- rocprofiler-sdk
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
- rocprofiler-register
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: aomp
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# checkout the repos tied to openmp-extras, plus llvm-project
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: aomp-extras_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: flang_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: llvm-project_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: extras
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/aomp-extras/build'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
extraBuildFlags: >-
|
||||
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAOMP_STANDALONE_BUILD=0
|
||||
-DAOMP_VERSION_STRING=9.99.99
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: openmp
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/openmp/build'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DOPENMP_ENABLE_LIBOMPTARGET=1
|
||||
-DLIBOMP_COPY_EXPORTS=OFF
|
||||
-DLIBOMP_OMPT_SUPPORT=ON
|
||||
-DLIBOMP_OMPD_SUPPORT=ON
|
||||
-DCMAKE_SKIP_INSTALL_RPATH=TRUE
|
||||
-DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
|
||||
-DLIBOMP_FORTRAN_MODULES_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/flang
|
||||
-DLIBOMP_MODULES_INSTALL_PATH=$(Build.BinariesDirectory)/llvm/include/flang/
|
||||
-GNinja
|
||||
- task: Bash@3
|
||||
displayName: 'ROCm symbolic link'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: offload
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/offload/build'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_SKIP_INSTALL_RPATH=TRUE
|
||||
-DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
|
||||
-DLIBOMPTARGET_LLVM_INCLUDE_DIRS=$(Build.SourcesDirectory)/llvm-project/llvm/include
|
||||
-DCMAKE_EXE_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
name: rocm-ci_medium_build_pool_2404
|
||||
${{ else }}:
|
||||
name: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# checkout the repos tied to openmp-extras, plus llvm-project
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: aomp-extras_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: flang_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: llvm-project_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
componentName: extras
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/aomp-extras/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/aomp-extras'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
extraBuildFlags: >-
|
||||
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAOMP_STANDALONE_BUILD=0
|
||||
-DAOMP_VERSION_STRING=9.99.99
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: openmp
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/openmp/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/llvm-project/openmp'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DOPENMP_ENABLE_LIBOMPTARGET=1
|
||||
-DLIBOMP_COPY_EXPORTS=OFF
|
||||
-DLIBOMP_OMPD_SUPPORT=ON
|
||||
-DCMAKE_SKIP_INSTALL_RPATH=TRUE
|
||||
-DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
|
||||
-DLIBOMP_FORTRAN_MODULES_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/flang
|
||||
-DLIBOMP_MODULES_INSTALL_PATH=$(Build.BinariesDirectory)/llvm/include/flang/
|
||||
multithreadFlag: -- -j32
|
||||
- task: Bash@3
|
||||
displayName: 'ROCm symbolic link'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: offload
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/offload/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/llvm-project/offload'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_SKIP_INSTALL_RPATH=TRUE
|
||||
-DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
|
||||
-DLIBOMPTARGET_LLVM_INCLUDE_DIRS=$(Build.SourcesDirectory)/llvm-project/llvm/include
|
||||
-DCMAKE_EXE_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: aomp_test_${{ job.target }}
|
||||
dependsOn: aomp
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -195,12 +215,16 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: ROCm symbolic link
|
||||
inputs:
|
||||
@@ -212,7 +236,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: aomp-extras_repo
|
||||
# these copy steps are from the aomp prototype script for test prep
|
||||
# these copy steps are from the aomp prototype script for test prep
|
||||
- task: CopyFiles@2
|
||||
displayName: 'Copy AOMP contents'
|
||||
inputs:
|
||||
|
||||
@@ -1,34 +1,42 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
copyJobs:
|
||||
- { os: ubuntu2204, backend: amd }
|
||||
- { os: almalinux8, backend: amd }
|
||||
- { os: ubuntu2204, backend: nvidia }
|
||||
- { os: almalinux8, backend: nvidia }
|
||||
|
||||
# hip and clr are tightly-coupled
|
||||
# run this same template for both repos
|
||||
# any changes for clr should just trigger HIP pipeline
|
||||
jobs:
|
||||
- job: hip_clr_combined
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
# checkout nothing, just copy artifacts from triggering HIP job
|
||||
# and then publish for this clr job or for this hipother job to maintain latest
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml
|
||||
parameters:
|
||||
componentName: HIP
|
||||
pipelineId: $(HIP_PIPELINE_ID)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Agent.BuildDirectory)/rocm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ each job in parameters.jobMatrix.copyJobs }}:
|
||||
- job: hip_clr_combined_${{ job.os }}_${{ job.backend }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
# checkout nothing, just copy artifacts from triggering HIP job
|
||||
# and then publish for this clr job or for this hipother job to maintain latest
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml
|
||||
parameters:
|
||||
componentName: HIP
|
||||
pipelineId: $(HIP_PIPELINE_ID)
|
||||
fileFilter: ${{ job.os }}*${{ job.backend }}
|
||||
- task: Bash@3
|
||||
displayName: Copy HIP artifacts
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: cp -a $(Agent.BuildDirectory)/rocm/* $(Build.BinariesDirectory)/
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipBLAS-common
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -14,54 +33,103 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- git
|
||||
- ninja-build
|
||||
- wget
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - hipBLASLt:
|
||||
# name: hipBLASLt
|
||||
# sparseCheckoutDir: projects/hipblaslt
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipBLAS_common_build
|
||||
|
||||
jobs:
|
||||
- job: hipBLAS_common
|
||||
variables:
|
||||
- group: common
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# extraEnvVars:
|
||||
# - ROCM_PATH:::/home/user/workspace/rocm
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipBLAS_common_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# extraEnvVars:
|
||||
# - ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipBLASLt
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -13,6 +32,8 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- ccache
|
||||
- gfortran
|
||||
- git
|
||||
- libdrm-dev
|
||||
- libmsgpack-dev
|
||||
@@ -20,9 +41,6 @@ parameters:
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- gfortran
|
||||
- libblas-dev
|
||||
- ccache
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -37,6 +55,7 @@ parameters:
|
||||
- hipBLAS-common
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocm-cmake
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
@@ -58,20 +77,37 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - rocBLAS:
|
||||
# name: rocBLAS
|
||||
# sparseCheckoutDir: projects/rocblas
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipBLASLt_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipBLASLt_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 300
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -86,6 +122,10 @@ jobs:
|
||||
- name: DAY_STRING
|
||||
value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -93,17 +133,22 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
@@ -111,22 +156,20 @@ jobs:
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
# Build and install gtest, lapack, hipBLAS-common
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipBLASLt repo
|
||||
- script: mkdir $(Pipeline.Workspace)/deps
|
||||
displayName: Create temp folder for external dependencies
|
||||
# hipBLASLt already has a CMake script for external deps, so we can just run that
|
||||
# https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
|
||||
- script: cmake $(Pipeline.Workspace)/s/deps
|
||||
displayName: Configure hipBLASLt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
- script: make
|
||||
displayName: Build hipBLASLt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
- script: sudo make install
|
||||
displayName: Install hipBLASLt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
# hipBLASLt has a script for gtest and lapack
|
||||
# https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
|
||||
# $(Agent.BuildDirectory)/deps is a temporary folder for the build process
|
||||
# $(Agent.BuildDirectory)/s/deps is part of the hipBLASLt repo
|
||||
- task: Bash@3
|
||||
displayName: Build and install external dependencies
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
mkdir -p $(Agent.BuildDirectory)/deps
|
||||
cd $(Agent.BuildDirectory)/deps
|
||||
cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
|
||||
make
|
||||
sudo make install
|
||||
- script: |
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
echo "##vso[task.prependpath]/usr/lib/ccache"
|
||||
@@ -134,93 +177,117 @@ jobs:
|
||||
- task: Cache@2
|
||||
displayName: Ccache caching
|
||||
inputs:
|
||||
key: hipBLASLt | $(Agent.OS) | ${{ job.target }} | $(DAY_STRING) | $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
key: hipBLASLt | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING) | $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
path: $(CCACHE_DIR)
|
||||
restoreKeys: |
|
||||
hipBLASLt | $(Agent.OS) | ${{ job.target }} | $(DAY_STRING)
|
||||
hipBLASLt | $(Agent.OS) | ${{ job.target }}
|
||||
hipBLASLt | $(Agent.OS)
|
||||
hipBLASLt | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING)
|
||||
hipBLASLt | ${{ job.os }} | ${{ job.target }}
|
||||
hipBLASLt | ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DTensile_LOGIC=
|
||||
-DTensile_CPU_THREADS=
|
||||
-DTensile_LIBRARY_FORMAT=msgpack
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
|
||||
installLatestCMake: true
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/amdclang
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
extraCopyDirectories:
|
||||
- deps
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
|
||||
installLatestCMake: true
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/amdclang
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
extraCopyDirectories:
|
||||
- deps
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipBLASLt_test_${{ job.target }}
|
||||
timeoutInMinutes: 300
|
||||
dependsOn: hipBLASLt_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipBLASLt
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipblaslt-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 300
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipblaslt-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipCUB
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -14,9 +33,8 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- libgtest-dev
|
||||
- git
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -33,103 +51,143 @@ parameters:
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipCUB_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DBUILD_BENCHMARK=ON
|
||||
-DBUILD_TEST=ON
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
extraCxxFlags: -Wno-deprecated-declarations
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipCUB_test_${{ job.target }}
|
||||
dependsOn: hipCUB_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipCUB
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hipcub'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hipcub'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipFFT
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -61,7 +80,11 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipFFT_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.target }} # todo: add OS
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -79,12 +102,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -102,9 +128,11 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -113,8 +141,8 @@ jobs:
|
||||
# gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipFFT_test_${{ job.target }}
|
||||
dependsOn: hipFFT_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -134,6 +162,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
@@ -141,10 +170,12 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipFFT
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipfft-test'
|
||||
testParameters: '--test_prob 0.002 --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipRAND
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -14,18 +33,18 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- googletest
|
||||
- git
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocRAND
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -33,110 +52,168 @@ parameters:
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- rocRAND
|
||||
- ROCR-Runtime
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - rocFFT:
|
||||
# name: rocFFT
|
||||
# sparseCheckoutDir: projects/rocfft
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipRAND_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipRAND_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DBUILD_TEST=ON
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# gpuTarget: ${{ job.target }}
|
||||
# extraEnvVars:
|
||||
# - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipRAND_test_${{ job.target }}
|
||||
dependsOn: hipRAND_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipRAND
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hipRAND'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hipRAND'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -92,7 +92,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: external
|
||||
cmakeBuildDir: 'deps/build'
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/deps/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/deps'
|
||||
installDir: '$(Pipeline.Workspace)/deps-install'
|
||||
extraBuildFlags: >-
|
||||
-DBUILD_BOOST=OFF
|
||||
|
||||
@@ -14,142 +14,188 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- python3-pip
|
||||
- libnuma-dev
|
||||
- ninja-build
|
||||
- python-is-python3
|
||||
- zlib1g-dev
|
||||
- pkg-config
|
||||
- python-is-python3
|
||||
- python3-pip
|
||||
- zlib1g-dev
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
- job: llvm_project
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_DEVICE_LIB_PATH
|
||||
value: '$(Build.BinariesDirectory)/amdgcn/bitcode'
|
||||
- name: HIP_PATH
|
||||
value: '$(Agent.BuildDirectory)/rocm'
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
skipLlvmSymlink: true
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocm-llvm
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Build.BinariesDirectory)/llvm;$(Build.BinariesDirectory)"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DLLVM_ENABLE_PROJECTS=clang;lld;clang-tools-extra;mlir;flang
|
||||
-DLLVM_ENABLE_RUNTIMES=compiler-rt;libunwind;libcxx;libcxxabi
|
||||
-DCLANG_ENABLE_AMDCLANG=ON
|
||||
-DLLVM_TARGETS_TO_BUILD=AMDGPU;X86
|
||||
-DLIBCXX_ENABLE_SHARED=OFF
|
||||
-DLIBCXX_ENABLE_STATIC=ON
|
||||
-DLIBCXX_INSTALL_LIBRARY=OFF
|
||||
-DLIBCXX_INSTALL_HEADERS=OFF
|
||||
-DLIBCXXABI_ENABLE_SHARED=OFF
|
||||
-DLIBCXXABI_ENABLE_STATIC=ON
|
||||
-DLIBCXXABI_INSTALL_STATIC_LIBRARY=OFF
|
||||
-DLLVM_BUILD_DOCS=OFF
|
||||
-DLLVM_ENABLE_SPHINX=OFF
|
||||
-DLLVM_ENABLE_ASSERTIONS=OFF
|
||||
-DLLVM_ENABLE_Z3_SOLVER=OFF
|
||||
-DLLVM_ENABLE_ZLIB=ON
|
||||
-DCLANG_DEFAULT_LINKER=lld
|
||||
-DCLANG_DEFAULT_RTLIB=compiler-rt
|
||||
-DCLANG_DEFAULT_UNWINDLIB=libgcc
|
||||
-DSANITIZER_AMDGPU=OFF
|
||||
-DPACKAGE_VENDOR=AMD
|
||||
-DCLANG_LINK_FLANG_LEGACY=ON
|
||||
-DCMAKE_CXX_STANDARD=17
|
||||
-DROCM_LLVM_BACKWARD_COMPAT_LINK=$(Build.BinariesDirectory)/llvm
|
||||
-DROCM_LLVM_BACKWARD_COMPAT_LINK_TARGET=./lib/llvm
|
||||
-GNinja
|
||||
cmakeBuildDir: 'llvm/build'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
# use llvm-lit to run unit tests for llvm, clang, and lld
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: check-llvm
|
||||
testDir: 'llvm/build'
|
||||
testExecutable: './bin/llvm-lit'
|
||||
testParameters: '-q --xunit-xml-output=llvm_test_output.xml --filter-out="live-debug-values-spill-tracking" ./test'
|
||||
testOutputFile: llvm_test_output.xml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: check-clang
|
||||
testDir: 'llvm/build'
|
||||
testExecutable: './bin/llvm-lit'
|
||||
testParameters: '-q --xunit-xml-output=clang_test_output.xml ./tools/clang/test'
|
||||
testOutputFile: clang_test_output.xml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: check-lld
|
||||
testDir: 'llvm/build'
|
||||
testExecutable: './bin/llvm-lit'
|
||||
testParameters: '-q --xunit-xml-output=lld_test_output.xml ./tools/lld/test'
|
||||
testOutputFile: lld_test_output.xml
|
||||
- task: CopyFiles@2
|
||||
displayName: Copy FileCheck for Publishing
|
||||
inputs:
|
||||
CleanTargetFolder: false
|
||||
SourceFolder: llvm/build/bin
|
||||
Contents: FileCheck
|
||||
TargetFolder: $(Build.BinariesDirectory)/llvm/bin
|
||||
retryCount: 3
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: device-libs
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
cmakeBuildDir: 'amd/device-libs/build'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: comgr
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build;$(Build.SourcesDirectory)/amd/device-libs/build"
|
||||
-DCOMGR_DISABLE_SPIRV=1
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
cmakeBuildDir: 'amd/comgr/build'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: comgr
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit comgr_test_output.xml'
|
||||
testDir: 'amd/comgr/build'
|
||||
testOutputFile: comgr_test_output.xml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: hipcc
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DHIPCC_BACKWARD_COMPATIBILITY=OFF
|
||||
cmakeBuildDir: 'amd/hipcc/build'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: combined
|
||||
extraEnvVars:
|
||||
- HIP_DEVICE_LIB_PATH:::/home/user/workspace/bin/amdgcn/bitcode
|
||||
- HIP_PATH:::/home/user/workspace/rocm
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: llvm_project_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
name: 'rocm-ci_high_build_pool_2404' #temporarily using 'high' pool while 'ultra' is down
|
||||
${{ else }}:
|
||||
name: 'rocm-ci_ultra_build_pool'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_DEVICE_LIB_PATH
|
||||
value: '$(Build.BinariesDirectory)/amdgcn/bitcode'
|
||||
- name: HIP_PATH
|
||||
value: '$(Agent.BuildDirectory)/rocm'
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
skipLlvmSymlink: true
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocm-llvm
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Build.BinariesDirectory)/llvm;$(Build.BinariesDirectory)"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DLLVM_ENABLE_PROJECTS=clang;lld;clang-tools-extra;mlir;flang
|
||||
-DLLVM_ENABLE_RUNTIMES=compiler-rt;libunwind;libcxx;libcxxabi
|
||||
-DCLANG_ENABLE_AMDCLANG=ON
|
||||
-DLLVM_TARGETS_TO_BUILD=AMDGPU;X86
|
||||
-DLIBCXX_ENABLE_SHARED=OFF
|
||||
-DLIBCXX_ENABLE_STATIC=ON
|
||||
-DLIBCXX_INSTALL_LIBRARY=OFF
|
||||
-DLIBCXX_INSTALL_HEADERS=OFF
|
||||
-DLIBCXXABI_ENABLE_SHARED=OFF
|
||||
-DLIBCXXABI_ENABLE_STATIC=ON
|
||||
-DLIBCXXABI_INSTALL_STATIC_LIBRARY=OFF
|
||||
-DLLVM_BUILD_DOCS=OFF
|
||||
-DLLVM_ENABLE_SPHINX=OFF
|
||||
-DLLVM_ENABLE_ASSERTIONS=OFF
|
||||
-DLLVM_ENABLE_Z3_SOLVER=OFF
|
||||
-DLLVM_ENABLE_ZLIB=ON
|
||||
-DCLANG_DEFAULT_LINKER=lld
|
||||
-DCLANG_DEFAULT_RTLIB=compiler-rt
|
||||
-DCLANG_DEFAULT_UNWINDLIB=libgcc
|
||||
-DSANITIZER_AMDGPU=OFF
|
||||
-DPACKAGE_VENDOR=AMD
|
||||
-DCLANG_LINK_FLANG_LEGACY=ON
|
||||
-DCMAKE_CXX_STANDARD=17
|
||||
-DROCM_LLVM_BACKWARD_COMPAT_LINK=$(Build.BinariesDirectory)/llvm
|
||||
-DROCM_LLVM_BACKWARD_COMPAT_LINK_TARGET=./lib/llvm
|
||||
-GNinja
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/llvm'
|
||||
installDir: '$(Build.BinariesDirectory)/llvm'
|
||||
# use llvm-lit to run unit tests for llvm, clang, and lld
|
||||
- task: Bash@3
|
||||
displayName: 'Copy llvm-lit to install directory'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
cp $(Build.SourcesDirectory)/llvm/build/bin/llvm-lit $(Build.BinariesDirectory)/llvm/bin/
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: check-llvm
|
||||
testDir: 'llvm/build'
|
||||
testExecutable: './bin/llvm-lit'
|
||||
testParameters: '-q --xunit-xml-output=llvm_test_output.xml --filter-out="live-debug-values-spill-tracking" ./test'
|
||||
testOutputFile: llvm_test_output.xml
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: check-clang
|
||||
testDir: 'llvm/build'
|
||||
testExecutable: './bin/llvm-lit'
|
||||
testParameters: '-q --xunit-xml-output=clang_test_output.xml ./tools/clang/test'
|
||||
testOutputFile: clang_test_output.xml
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: check-lld
|
||||
testDir: 'llvm/build'
|
||||
testExecutable: './bin/llvm-lit'
|
||||
testParameters: '-q --xunit-xml-output=lld_test_output.xml ./tools/lld/test'
|
||||
testOutputFile: lld_test_output.xml
|
||||
os: ${{ job.os }}
|
||||
- task: CopyFiles@2
|
||||
displayName: Copy FileCheck for Publishing
|
||||
inputs:
|
||||
CleanTargetFolder: false
|
||||
SourceFolder: llvm/build/bin
|
||||
Contents: FileCheck
|
||||
TargetFolder: $(Build.BinariesDirectory)/llvm/bin
|
||||
retryCount: 3
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: device-libs
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/amd/device-libs/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/amd/device-libs'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: comgr
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build;$(Build.SourcesDirectory)/amd/device-libs/build"
|
||||
-DCOMGR_DISABLE_SPIRV=1
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/amd/comgr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/amd/comgr'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: comgr
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit comgr_test_output.xml'
|
||||
testDir: 'amd/comgr/build'
|
||||
testOutputFile: comgr_test_output.xml
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: hipcc
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DHIPCC_BACKWARD_COMPATIBILITY=OFF
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/amd/hipcc/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/amd/hipcc'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: combined
|
||||
extraEnvVars:
|
||||
- HIP_DEVICE_LIB_PATH:::/home/user/workspace/bin/amdgcn/bitcode
|
||||
- HIP_PATH:::/home/user/workspace/rocm
|
||||
|
||||
@@ -15,7 +15,6 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- googletest
|
||||
- libboost-program-options-dev
|
||||
- libdrm-dev
|
||||
- libfftw3-dev
|
||||
@@ -90,6 +89,10 @@ jobs:
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
submoduleBehaviour: recursive
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -101,12 +104,11 @@ jobs:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_TESTS=ON
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/share/rocm/cmake;$(Agent.BuildDirectory)/rocm/libexec/hipify
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
|
||||
@@ -105,6 +105,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
|
||||
cmakeSourceDir: $(Build.SourcesDirectory)/grpc
|
||||
installDir: $(Build.SourcesDirectory)/bin
|
||||
extraBuildFlags: >-
|
||||
-DgRPC_INSTALL=ON
|
||||
|
||||
@@ -125,6 +125,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: PyBind11
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/pybind11/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/pybind11'
|
||||
customInstallPath: false
|
||||
installEnabled: false
|
||||
extraBuildFlags: >-
|
||||
@@ -141,6 +142,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: RapidJSON
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/rapidjson/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/rapidjson'
|
||||
customInstallPath: false
|
||||
installEnabled: false
|
||||
extraBuildFlags: >-
|
||||
@@ -200,7 +202,6 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm/include/rocal
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
demands: firstRenderDeviceAccess
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocBLAS
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -64,19 +83,43 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# # rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# # for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# unifiedBuild:
|
||||
# downstreamAggregateNames: rocBLAS+rocPRIM
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# - rocPRIM_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocBLAS_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -89,6 +132,10 @@ jobs:
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -96,19 +143,26 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
@@ -128,63 +182,94 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
installAOCL: true
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
installAOCL: true
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocBLAS_test_${{ job.target }}
|
||||
dependsOn: rocBLAS_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocBLAS
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocblas-test'
|
||||
testParameters: '--yaml rocblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocblas-test'
|
||||
testParameters: '--yaml rocblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# ${{ if parameters.unifiedBuild }}:
|
||||
# buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
# ${{ else }}:
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocDecode
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -13,29 +16,28 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- python3-pip
|
||||
- cmake
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
- ffmpeg
|
||||
- libavcodec-dev
|
||||
- libavformat-dev
|
||||
- libavutil-dev
|
||||
- libdrm-dev
|
||||
- libstdc++-12-dev
|
||||
- libva-amdgpu-dev
|
||||
- mesa-amdgpu-va-drivers
|
||||
- libdrm-dev
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- rocminfo
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -48,53 +50,70 @@ parameters:
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: rocDecode_build
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# registerROCmPackages: true
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# registerROCmPackages: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocDecode_test_${{ job.target }}
|
||||
dependsOn: rocDecode_build
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -108,27 +127,33 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
demands: firstRenderDeviceAccess
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Build rocDecode tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
${{ iif(eq(job.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
mkdir rocDecode-tests
|
||||
cd rocDecode-tests
|
||||
cmake $(Agent.BuildDirectory)/rocm/share/rocdecode/test
|
||||
@@ -137,6 +162,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: rocDecode
|
||||
testDir: 'rocDecode-tests'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocFFT
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -59,10 +78,23 @@ parameters:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - hipFFT:
|
||||
# name: hipFFT
|
||||
# sparseCheckoutDir: projects/hipfft
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocFFT_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocFFT_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }} # todo: un-hardcode OS
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -79,12 +111,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -101,9 +136,11 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -114,8 +151,8 @@ jobs:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocFFT_test_${{ job.target }}
|
||||
dependsOn: rocFFT_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -135,6 +172,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
@@ -142,10 +180,12 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocFFT
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocfft-test'
|
||||
testParameters: '--test_prob 0.004 --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
@@ -154,3 +194,15 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocJPEG
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -44,32 +47,44 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocJPEG_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
name: rocm-ci_medium_build_pool_2404
|
||||
${{ else }}:
|
||||
name: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
@@ -80,17 +95,26 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
@@ -99,8 +123,8 @@ jobs:
|
||||
# registerROCmPackages: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocJPEG_test_${{ job.target }}
|
||||
dependsOn: rocJPEG_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -114,29 +138,34 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
demands: firstRenderDeviceAccess
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Build rocJPEG tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
${{ iif(eq(job.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
mkdir rocJPEG-tests
|
||||
cd rocJPEG-tests
|
||||
cmake $(Agent.BuildDirectory)/rocm/share/rocjpeg/test
|
||||
@@ -145,6 +174,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: rocJPEG
|
||||
testDir: 'rocJPEG-tests'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocPRIM
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -14,18 +33,17 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- libgtest-dev
|
||||
- git
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -39,96 +57,175 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 1, shardCount: 3 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 2, shardCount: 3 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 3, shardCount: 3 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a, shard: 1, shardCount: 3 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a, shard: 2, shardCount: 3 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a, shard: 3, shardCount: 3 }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocThrust:
|
||||
name: rocThrust
|
||||
sparseCheckoutDir: projects/rocthrust
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocPRIM_build
|
||||
- hipCUB:
|
||||
name: hipCUB
|
||||
sparseCheckoutDir: projects/hipcub
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocPRIM_build
|
||||
# rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'true'
|
||||
# buildDependsOn:
|
||||
# - rocPRIM_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocPRIM_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_BENCHMARK=ON
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DBUILD_BENCHMARK=ON
|
||||
-DBUILD_TEST=ON
|
||||
-GNinja
|
||||
extraCxxFlags: -Wno-deprecated-declarations
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocPRIM_test_${{ job.target }}
|
||||
dependsOn: rocPRIM_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocPRIM
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}_shard_${{ job.shard }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -168,7 +168,6 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
demands: firstRenderDeviceAccess
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocRAND
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -15,18 +34,16 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- googletest
|
||||
- libgtest-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -40,56 +57,96 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipRAND:
|
||||
name: hipRAND
|
||||
sparseCheckoutDir: projects/hiprand
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocRAND_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocRAND_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
${{ else }}:
|
||||
name: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_TEST=ON
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DBUILD_TEST=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
@@ -98,42 +155,63 @@ jobs:
|
||||
# extraEnvVars:
|
||||
# - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocRAND_test_${{ job.target }}
|
||||
dependsOn: rocRAND_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocRAND
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocRAND'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocRAND'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocSOLVER
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -26,14 +45,12 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- hipSPARSE
|
||||
- llvm-project
|
||||
- rocBLAS
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- ROCR-Runtime
|
||||
- rocSPARSE
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -55,33 +72,47 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocSOLVER_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- task: Bash@3
|
||||
displayName: 'Clone lapack'
|
||||
inputs:
|
||||
@@ -92,11 +123,15 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: lapack
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_Fortran_FLAGS=-fno-optimize-sibling-calls
|
||||
@@ -105,9 +140,11 @@ jobs:
|
||||
-DLAPACKE=OFF
|
||||
-GNinja
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/lapack/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/lapack'
|
||||
installDir: '$(Pipeline.Workspace)/deps-install'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
@@ -119,56 +156,71 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraCopyDirectories:
|
||||
- deps-install
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraCopyDirectories:
|
||||
- deps-install
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocSOLVER_test_${{ job.target }}
|
||||
dependsOn: rocSOLVER_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocSOLVER
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocsolver-test'
|
||||
testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rocsolver-test'
|
||||
testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocThrust
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -14,18 +33,17 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
- libboost-program-options-dev
|
||||
- googletest
|
||||
- libfftw3-dev
|
||||
- git
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- hipRAND
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- ROCR-Runtime
|
||||
@@ -36,104 +54,142 @@ parameters:
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- ROCR-Runtime
|
||||
- hipRAND
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocThrust_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-GNinja
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DBUILD_TEST=ON
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocThrust_test_${{ job.target }}
|
||||
dependsOn: rocThrust_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocThrust
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocthrust'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocthrust'
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "scan.hip"'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -16,8 +16,6 @@ parameters:
|
||||
- doxygen
|
||||
- doxygen-doc
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-sphinx
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -25,49 +23,75 @@ parameters:
|
||||
- cmake==3.20.5
|
||||
- ninja
|
||||
- rocm-docs-core
|
||||
- sphinx
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
- job: rocm_cmake
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- task: Bash@3
|
||||
displayName: Add CMake to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(python3 -m site --user-base)/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
- task: Bash@3
|
||||
displayName: CTest setup
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
python -m pip install -r $(Build.SourcesDirectory)/docs/requirements.txt
|
||||
python -m pip install -r $(Build.SourcesDirectory)/test/docsphinx/docs/.sphinx/requirements.txt
|
||||
git config --global user.email "you@example.com"
|
||||
git config --global user.name "Your Name"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm-cmake
|
||||
testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: combined
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_cmake_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
${{ else }}:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- task: Bash@3
|
||||
displayName: Add CMake to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(python3 -m site --user-base)/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
- task: Bash@3
|
||||
displayName: CTest setup
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
python -m pip install -r $(Build.SourcesDirectory)/docs/requirements.txt
|
||||
python -m pip install -r $(Build.SourcesDirectory)/test/docsphinx/docs/.sphinx/requirements.txt
|
||||
git config --global user.email "you@example.com"
|
||||
git config --global user.name "Your Name"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm-cmake
|
||||
testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: combined
|
||||
|
||||
@@ -15,39 +15,61 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
- job: rocm_core
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CURRENT_BINARY_DIR=$PWD
|
||||
-DCMAKE_CURRENT_SOURCE_DIR=$PWD/../
|
||||
-DCMAKE_VERBOSE_MAKEFILE=1
|
||||
-DCPACK_GENERATOR=DEB
|
||||
-DCPACK_DEBIAN_PACKAGE_RELEASE="local.9999~99.99"
|
||||
-DCPACK_RPM_PACKAGE_RELEASE="local.9999"
|
||||
-DROCM_VERSION="$(NEXT_RELEASE_VERSION)"
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_core_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
${{ else }}:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CURRENT_BINARY_DIR=$PWD
|
||||
-DCMAKE_CURRENT_SOURCE_DIR=$PWD/../
|
||||
-DCMAKE_VERBOSE_MAKEFILE=1
|
||||
-DCPACK_GENERATOR=DEB
|
||||
-DCPACK_DEBIAN_PACKAGE_RELEASE="local.9999~99.99"
|
||||
-DCPACK_RPM_PACKAGE_RELEASE="local.9999"
|
||||
-DROCM_VERSION="$(NEXT_RELEASE_VERSION)"
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -15,6 +15,7 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- libglfw3-dev
|
||||
- libmsgpack-dev
|
||||
- libtbb-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
@@ -183,6 +184,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: rocm-examples
|
||||
testDir: $(Build.SourcesDirectory)/build
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "rocfft_callback"'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -17,50 +17,66 @@ parameters:
|
||||
- libdrm-dev
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
- python3-pip
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: rocm_smi_lib_build
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DBUILD_TESTS=ON
|
||||
-DROCM_DEP_ROCMCORE=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_smi_lib_build_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
${{ else }}:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DBUILD_TESTS=ON
|
||||
-DROCM_DEP_ROCMCORE=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocm_smi_lib_test_${{ job.target }}
|
||||
dependsOn: rocm_smi_lib_build
|
||||
- job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: rocm_smi_lib_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -77,8 +93,11 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
@@ -86,8 +105,9 @@ jobs:
|
||||
parameters:
|
||||
componentName: rocm_smi_lib
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/share/rocm_smi/rsmitst_tests/rsmitst'
|
||||
testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -17,7 +17,6 @@ parameters:
|
||||
- libdrm-amdgpu-dev
|
||||
- libdrm-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -32,49 +31,63 @@ parameters:
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- job: rocminfo
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
skipLlvmSymlink: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCRTST_BLD_TYPE=release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocminfo_build_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
skipLlvmSymlink: true
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCRTST_BLD_TYPE=release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocminfo_test_${{ job.target }}
|
||||
dependsOn: rocminfo
|
||||
dependsOn: rocminfo_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -91,14 +104,18 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
@@ -109,6 +126,7 @@ jobs:
|
||||
testExecutable: './rocm/bin/rocminfo'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_agent_enumerator
|
||||
@@ -116,6 +134,7 @@ jobs:
|
||||
testExecutable: './rocm/bin/rocm_agent_enumerator'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -24,24 +24,28 @@ parameters:
|
||||
default:
|
||||
- astunparse==1.6.2
|
||||
- colorlover
|
||||
- "dash>=1.12.0"
|
||||
- dash-bootstrap-components
|
||||
- dash-svg
|
||||
- "dash>=3.0.0"
|
||||
- kaleido==0.2.1
|
||||
- matplotlib
|
||||
- "numpy>=1.17.5"
|
||||
- "pandas>=1.4.3"
|
||||
- plotext
|
||||
- plotille
|
||||
- pymongo
|
||||
- pyyaml
|
||||
- tabulate
|
||||
- tqdm
|
||||
- dash-svg
|
||||
- dash-bootstrap-components
|
||||
- kaleido
|
||||
- setuptools
|
||||
- plotille
|
||||
- tabulate
|
||||
- textual
|
||||
- textual_plotext
|
||||
- textual-fspicker
|
||||
- tqdm
|
||||
- mock
|
||||
- pytest
|
||||
- pytest-cov
|
||||
- pytest-xdist
|
||||
- name: rocmDependencies
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
@@ -114,14 +118,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -165,14 +161,6 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Add en_US.UTF-8 locale
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale
|
||||
locale -a
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
@@ -184,9 +172,17 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
dependencySource: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Add en_US.UTF-8 locale
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale
|
||||
locale -a
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
|
||||
@@ -15,40 +15,62 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
- job: rocprofiler_register
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
|
||||
-DROCPROFILER_REGISTER_BUILD_TESTS=ON
|
||||
-DROCPROFILER_REGISTER_BUILD_SAMPLES=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# environment: combined
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_register_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
${{ else }}:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
|
||||
-DROCPROFILER_REGISTER_BUILD_TESTS=ON
|
||||
-DROCPROFILER_REGISTER_BUILD_SAMPLES=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# environment: combined
|
||||
|
||||
@@ -14,10 +14,12 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- build-essential
|
||||
- cmake
|
||||
- libdrm-amdgpu-dev
|
||||
- libdrm-dev
|
||||
- libdw-dev
|
||||
- libelf-dev
|
||||
- libsqlite3-dev
|
||||
- libva-dev
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
@@ -74,8 +76,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -167,7 +167,6 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
demands: firstRenderDeviceAccess
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocprofiler
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -15,7 +18,6 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- libgtest-dev
|
||||
- libdrm-dev
|
||||
- libdw-dev
|
||||
- libsystemd-dev
|
||||
@@ -26,13 +28,13 @@ parameters:
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- pyyaml==5.3.1
|
||||
- Cppheaderparser
|
||||
- websockets
|
||||
- matplotlib
|
||||
- lxml
|
||||
- barectf
|
||||
- Cppheaderparser
|
||||
- lxml
|
||||
- matplotlib
|
||||
- pandas
|
||||
- pyyaml==5.3.1
|
||||
- websockets
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -41,29 +43,33 @@ parameters:
|
||||
- ROCdbgapi
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocm_smi_lib
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -72,6 +78,10 @@ jobs:
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -79,46 +89,59 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_MODULE_PATH=$(Build.SourcesDirectory)/cmake_modules;$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_MODULE_PATH=$(Build.SourcesDirectory)/cmake_modules;$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/lib64/cmake;$(Agent.BuildDirectory)/rocm/lib64/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DENABLE_LDCONFIG=OFF
|
||||
-DUSE_PROF_API=1
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
multithreadFlag: -- -j32
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_test_${{ job.target }}
|
||||
dependsOn: rocprofiler_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -139,16 +162,21 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
@@ -157,12 +185,14 @@ jobs:
|
||||
testExecutable: ./run.sh
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV2
|
||||
testDir: $(Agent.BuildDirectory)/rocm
|
||||
testExecutable: share/rocprofiler/tests/runUnitTests
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: roctracer
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
@@ -18,7 +21,7 @@ parameters:
|
||||
- graphviz
|
||||
- libdrm-amdgpu-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- zlib1g-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -45,26 +48,32 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: roctracer_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -72,6 +81,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
@@ -83,21 +93,27 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
# the linker flags will not affect ubuntu2204 builds as the paths do not exist
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=release
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/lib64/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -108,8 +124,8 @@ jobs:
|
||||
# registerROCmPackages: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: roctracer_test_${{ job.target }}
|
||||
dependsOn: roctracer_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
@@ -127,17 +143,20 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
@@ -146,6 +165,7 @@ jobs:
|
||||
testParameters: ''
|
||||
testDir: $(Agent.BuildDirectory)
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -38,6 +38,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/grpc/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/grpc
|
||||
extraBuildFlags: >-
|
||||
-DgRPC_INSTALL=ON
|
||||
-DgRPC_BUILD_TESTS=OFF
|
||||
|
||||
@@ -11,35 +11,54 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- git
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
- job: gtest
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: 'git clone gtest'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone -b ${{ parameters.gtestVersion }} https://github.com/google/googletest --depth=1 --shallow-submodules --recurse-submodules
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/googletest/build
|
||||
extraBuildFlags: >-
|
||||
-DGTEST_FORCE_SHARED_CRT=ON
|
||||
-DCMAKE_DEBUG_POSTFIX=d
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: gtest_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone GTest ${{ parameters.gtestVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/google/googletest -b ${{ parameters.gtestVersion }} --depth=1 --shallow-submodules --recurse-submodules
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/googletest/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/googletest
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DGTEST_FORCE_SHARED_CRT=ON
|
||||
-DCMAKE_DEBUG_POSTFIX=d
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
|
||||
@@ -4,71 +4,71 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- build-essential
|
||||
- git
|
||||
- ninja-build
|
||||
- openjdk-8-jdk
|
||||
- ca-certificates
|
||||
- autoconf
|
||||
- bc
|
||||
- bridge-utils
|
||||
- build-essential
|
||||
- ca-certificates
|
||||
- ccache
|
||||
- devscripts
|
||||
- dkms
|
||||
- doxygen
|
||||
- fakeroot
|
||||
- ffmpeg
|
||||
- gfortran
|
||||
- git
|
||||
- gnutls-bin
|
||||
- libamd2
|
||||
- libavformat-dev
|
||||
- libblas3
|
||||
- libcamd2
|
||||
- libccolamd2
|
||||
- libcholmod3
|
||||
- libcolamd2
|
||||
- libdpkg-dev
|
||||
- libdpkg-perl
|
||||
- libdrm-amdgpu1
|
||||
- libdrm-dev
|
||||
- libelf-dev
|
||||
- libfreetype-dev
|
||||
- libgfortran5
|
||||
- libgomp1
|
||||
- libjpeg-dev
|
||||
- libjpeg-turbo-official
|
||||
- liblapack-dev
|
||||
- liblapack3
|
||||
- libmetis5
|
||||
- libncurses-dev
|
||||
- libnuma-dev
|
||||
- libopenblas-dev
|
||||
- libpth-dev
|
||||
- libquadmath0
|
||||
- libssh-dev
|
||||
- libstdc++-12-dev
|
||||
- libsuitesparseconfig5
|
||||
- libswscale-dev
|
||||
- libtinfo-dev
|
||||
- libunwind-dev
|
||||
- libwebp-dev
|
||||
- llvm-dev
|
||||
- ncurses-base
|
||||
- ninja-build
|
||||
- numactl
|
||||
- openjdk-8-jdk
|
||||
- python-is-python3
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- wget
|
||||
- ncurses-base
|
||||
- libncurses-dev
|
||||
- numactl
|
||||
- libnuma-dev
|
||||
- libssh-dev
|
||||
- libunwind-dev
|
||||
- llvm-dev
|
||||
- libpth-dev
|
||||
- qemu-kvm
|
||||
- re2c
|
||||
- subversion
|
||||
- fakeroot
|
||||
- autoconf
|
||||
- libgomp1
|
||||
- libtinfo-dev
|
||||
- libcholmod3
|
||||
- libsuitesparseconfig5
|
||||
- libstdc++-12-dev
|
||||
- python-is-python3
|
||||
- gfortran
|
||||
- libgfortran5
|
||||
- liblapack3
|
||||
- libblas3
|
||||
- libquadmath0
|
||||
- libmetis5
|
||||
- libamd2
|
||||
- libcamd2
|
||||
- libcolamd2
|
||||
- libccolamd2
|
||||
- libdrm-amdgpu1
|
||||
- ccache
|
||||
- wget
|
||||
- zip
|
||||
- libjpeg-turbo-official
|
||||
- libjpeg-dev
|
||||
- libwebp-dev
|
||||
- libfreetype-dev
|
||||
- gnutls-bin
|
||||
- ffmpeg
|
||||
- libopenblas-dev
|
||||
- liblapack-dev
|
||||
- libswscale-dev
|
||||
- libavformat-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- astunparse
|
||||
- "expecttest>=0.2.1"
|
||||
- "expecttest>=0.3.0"
|
||||
- hypothesis
|
||||
- numpy
|
||||
- psutil
|
||||
@@ -76,8 +76,8 @@ parameters:
|
||||
- requests
|
||||
- setuptools==75.8.0
|
||||
- types-dataclasses
|
||||
- "typing-extensions>=4.8.0"
|
||||
- "sympy>=1.13.0"
|
||||
- "typing-extensions>=4.10.0"
|
||||
- "sympy>=1.13.3"
|
||||
- filelock
|
||||
- networkx
|
||||
- jinja2
|
||||
@@ -97,36 +97,39 @@ parameters:
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocminfo
|
||||
- MIOpen
|
||||
- clr
|
||||
- hipBLAS
|
||||
- hipBLASLt
|
||||
- hipFFT
|
||||
- hipRAND
|
||||
- hipSOLVER
|
||||
- hipSPARSE
|
||||
- ROCR-Runtime
|
||||
- hipSPARSELt
|
||||
- llvm-project
|
||||
- MIOpen
|
||||
- rccl
|
||||
- rocBLAS
|
||||
- rocFFT
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
- rocm_smi_lib
|
||||
- rocPRIM
|
||||
- rocprofiler-register
|
||||
- rocRAND
|
||||
- ROCR-Runtime
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
- roctracer
|
||||
- hipBLASLt
|
||||
- rocprofiler-register
|
||||
- rocm-core
|
||||
- rocPRIM
|
||||
# below are additional dependencies not called out by build script, but throw errors during cmake
|
||||
- composable_kernel
|
||||
- hipBLAS-common
|
||||
- hipCUB
|
||||
- rocThrust
|
||||
- hipBLAS-common
|
||||
- composable_kernel
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
# rocroller.so needed and is not included in the wheel
|
||||
- hipBLASLt
|
||||
- rocminfo
|
||||
# Reference on what tests to run for torchvision found in private repo:
|
||||
# https://github.com/ROCm/rocAutomation/blob/jenkins-pipelines/pytorch/pytorch_ci/test_pytorch_test1.sh#L54
|
||||
@@ -240,12 +243,6 @@ jobs:
|
||||
git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules
|
||||
sudo ln -s $(Build.SourcesDirectory)/builder /builder
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Temporarily Patch CK Submodule
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git pull origin develop
|
||||
workingDirectory: $(Build.SourcesDirectory)/pytorch/third_party/composable_kernel
|
||||
- task: Bash@3
|
||||
displayName: Install patchelf
|
||||
inputs:
|
||||
@@ -267,6 +264,11 @@ jobs:
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_rocm_magma.sh $(MAGMA_ROCM)
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install targeted typing_extensions for build
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: pip install --target=$(Build.SourcesDirectory)/pytorch/torch/.. typing_extensions
|
||||
- task: Bash@3
|
||||
displayName: Run ROCm Build Script
|
||||
inputs:
|
||||
@@ -281,7 +283,6 @@ jobs:
|
||||
PYTORCH_ROOT=$(PYTORCH_ROOT)
|
||||
CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
DESIRED_DEVTOOLSET=$(DESIRED_DEVTOOLSET)
|
||||
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
PYTORCH_BUILD_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)
|
||||
PYTORCH_BUILD_NUMBER=$(date -u +%Y%m%d)
|
||||
SKIP_ALL_TESTS=1
|
||||
@@ -322,8 +323,6 @@ jobs:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
TORCHVISION_PACKAGE_NAME=torchvision.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
PYTORCH_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
|
||||
BUILD_VERSION=$(cat $(Build.SourcesDirectory)/vision/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
|
||||
python3 setup.py bdist_wheel
|
||||
@@ -400,11 +399,9 @@ jobs:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
inputs:
|
||||
itemPattern: '**/*$(JOB_GPU_TARGET)*.whl'
|
||||
itemPattern: '**/*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
dependencySource: staging
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
|
||||
@@ -3,12 +3,21 @@ parameters:
|
||||
- name: jobList
|
||||
type: object
|
||||
default:
|
||||
- gfx942-staging:
|
||||
target: gfx942
|
||||
source: staging
|
||||
- gfx90a-staging:
|
||||
target: gfx90a
|
||||
source: staging
|
||||
- { os: ubuntu2204, target: gfx942, source: staging }
|
||||
- { os: ubuntu2204, target: gfx90a, source: staging }
|
||||
- { os: ubuntu2204, target: gfx1201, source: staging }
|
||||
- { os: ubuntu2204, target: gfx1100, source: staging }
|
||||
- { os: ubuntu2204, target: gfx1030, source: staging }
|
||||
- { os: ubuntu2404, target: gfx942, source: staging }
|
||||
- { os: ubuntu2404, target: gfx90a, source: staging }
|
||||
- { os: ubuntu2404, target: gfx1201, source: staging }
|
||||
- { os: ubuntu2404, target: gfx1100, source: staging }
|
||||
- { os: ubuntu2404, target: gfx1030, source: staging }
|
||||
- { os: almalinux8, target: gfx942, source: staging }
|
||||
- { os: almalinux8, target: gfx90a, source: staging }
|
||||
- { os: almalinux8, target: gfx1201, source: staging }
|
||||
- { os: almalinux8, target: gfx1100, source: staging }
|
||||
- { os: almalinux8, target: gfx1030, source: staging }
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -16,9 +25,9 @@ parameters:
|
||||
- amdsmi
|
||||
- aomp-extras
|
||||
- aomp
|
||||
- clr
|
||||
- composable_kernel
|
||||
- half
|
||||
- HIP
|
||||
- hip-tests
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
@@ -83,7 +92,7 @@ schedules:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobList }}:
|
||||
- job: rocm_nightly_${{ job.target }}_${{ job.source }}
|
||||
- job: rocm_nightly_${{ job.os }}_${{ job.target }}_${{ job.source }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -108,9 +117,9 @@ jobs:
|
||||
parameters:
|
||||
dependencySource: ${{ job.source }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
skipLibraryLinking: true
|
||||
skipLlvmSymlink: true
|
||||
- script: df -h
|
||||
displayName: System disk space after ROCm
|
||||
- script: du -sh $(Agent.BuildDirectory)/rocm
|
||||
|
||||
@@ -28,12 +28,22 @@ resources:
|
||||
endpoint: ROCm
|
||||
name: ROCm/hipother
|
||||
ref: ${{ parameters.checkoutRef }}
|
||||
pipelines:
|
||||
- pipeline: hip_pipeline
|
||||
source: \experimental\HIP
|
||||
trigger: true
|
||||
- pipeline: hipother_pipeline
|
||||
source: \experimental\hipother
|
||||
trigger: true
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/HIP.yml
|
||||
parameters:
|
||||
checkoutRepo: release_repo
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
- ${{ if eq(variables['Build.Reason'], 'ResourceTrigger') }}:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/copyHIP.yml@pipelines_repo
|
||||
- ${{ if ne(variables['Build.Reason'], 'ResourceTrigger') }}:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/HIP.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: release_repo
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
|
||||
@@ -19,36 +19,24 @@ parameters:
|
||||
default: false
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Set allowPartiallySucceededBuilds
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
if [[ ",$ALLOWED_PARTIAL_SUCCEED_BUILDS," == *",${{ parameters.componentName }},"* ]]; then
|
||||
echo "##vso[task.setvariable variable=allowPartiallySucceededBuilds;]true"
|
||||
else
|
||||
echo "##vso[task.setvariable variable=allowPartiallySucceededBuilds;]false"
|
||||
fi
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ parameters.componentName }}
|
||||
inputs:
|
||||
${{ if eq(parameters.aggregatePipeline, false) }}:
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
allowPartiallySucceededBuilds: true
|
||||
${{ if parameters.aggregatePipeline }}:
|
||||
buildType: 'current'
|
||||
${{ else }}:
|
||||
buildType: 'specific'
|
||||
project: ROCm-CI
|
||||
definition: ${{ parameters.pipelineId }}
|
||||
specificBuildWithTriggering: true
|
||||
itemPattern: '**/*${{ parameters.fileFilter }}*'
|
||||
# aomp is a special case, since the trigger file is under ROCm/ROCm instead of the component repo
|
||||
${{ if notIn(parameters.componentName, 'aomp') }}:
|
||||
buildVersionToDownload: latestFromBranch # default is 'latest'
|
||||
definition: ${{ parameters.pipelineId }}
|
||||
branchName: refs/heads/${{ parameters.branchName }}
|
||||
allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
${{ else }}:
|
||||
buildType: 'current'
|
||||
itemPattern: '**/${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
${{ if eq(parameters.componentName, 'aomp') }}:
|
||||
buildVersionToDownload: latest # aomp trigger lives in ROCm/ROCm, so cannot use ROCm/aomp branch names
|
||||
${{ else }}:
|
||||
buildVersionToDownload: latestFromBranch
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ parameters.componentName }}
|
||||
inputs:
|
||||
|
||||
@@ -3,15 +3,21 @@
|
||||
# publish can be toggled off for jobs that produce multiple tarballs
|
||||
# for those cases, only publish the last call which puts all the tarballs in one container folder
|
||||
parameters:
|
||||
- name: artifactName
|
||||
- name: componentName
|
||||
type: string
|
||||
default: 'drop'
|
||||
- name: publish
|
||||
type: boolean
|
||||
default: true
|
||||
default: $(Build.DefinitionName)
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
- name: artifactName
|
||||
type: string
|
||||
default: drop
|
||||
- name: publish
|
||||
type: boolean
|
||||
default: true
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
|
||||
steps:
|
||||
- task: ArchiveFiles@2
|
||||
@@ -20,7 +26,7 @@ steps:
|
||||
includeRootFolder: false
|
||||
archiveType: 'tar'
|
||||
tarCompression: 'gz'
|
||||
archiveFile: '$(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.tar.gz'
|
||||
archiveFile: '$(Build.ArtifactStagingDirectory)/${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt).tar.gz'
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Cleanup Staging Area'
|
||||
inputs:
|
||||
@@ -32,7 +38,7 @@ steps:
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: echo "$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.tar.gz" >> pipelineArtifacts.txt
|
||||
script: echo "${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt).tar.gz" >> pipelineArtifacts.txt
|
||||
# then publish it
|
||||
- ${{ if parameters.publish }}:
|
||||
- task: PublishPipelineArtifact@1
|
||||
@@ -40,4 +46,5 @@ steps:
|
||||
displayName: '${{ parameters.artifactName }} Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
artifactName: ${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt)
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
- name: componentName
|
||||
type: string
|
||||
default: ''
|
||||
@@ -20,17 +23,23 @@ steps:
|
||||
displayName: '${{ parameters.componentName }} configure flags'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ./configure --prefix=${{ parameters.installDir }} ${{ parameters.configureFlags }}
|
||||
workingDirectory: ${{ parameters.buildDir }}
|
||||
script: |
|
||||
${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
./configure --prefix=${{ parameters.installDir }} ${{ parameters.configureFlags }}
|
||||
- task: Bash@3
|
||||
displayName: '${{ parameters.componentName }} make'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ${{ parameters.makeCallPrefix }} make -j$(nproc)
|
||||
workingDirectory: ${{ parameters.buildDir }}
|
||||
script: |
|
||||
${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
${{ parameters.makeCallPrefix }} make -j$(nproc)
|
||||
- task: Bash@3
|
||||
displayName: '${{ parameters.componentName }} make install'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: make install
|
||||
workingDirectory: ${{ parameters.buildDir }}
|
||||
script: |
|
||||
${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
make install
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
- name: componentName
|
||||
type: string
|
||||
default: ''
|
||||
- name: extraBuildFlags
|
||||
type: string
|
||||
default: ''
|
||||
- name: extraCxxFlags
|
||||
type: string
|
||||
default: ''
|
||||
- name: multithreadFlag
|
||||
type: string
|
||||
default: ''
|
||||
- name: cmakeBuildDir
|
||||
type: string
|
||||
default: 'build'
|
||||
default: $(Agent.BuildDirectory)/s/build
|
||||
- name: cmakeSourceDir
|
||||
type: string
|
||||
default: '..'
|
||||
default: $(Agent.BuildDirectory)/s
|
||||
- name: customBuildTarget
|
||||
type: string
|
||||
default: ''
|
||||
@@ -32,41 +38,81 @@ parameters:
|
||||
- name: installEnabled
|
||||
type: boolean
|
||||
default: true
|
||||
# for jobs that rebuild during install step and use ninja
|
||||
# set to true to save time, only applies for almalinux8
|
||||
- name: consolidateBuildAndInstall
|
||||
type: boolean
|
||||
default: false
|
||||
- name: printDiskSpace
|
||||
type: boolean
|
||||
default: true
|
||||
# todo: make this control cxx and c compiler flags
|
||||
- name: useAmdclang
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
# for cmake calls, set env variables for AlmaLinux 8
|
||||
# to simulate running source /opt/rh/gcc-toolset-14/enable for the session
|
||||
|
||||
steps:
|
||||
# create workingDirectory if it does not exist and change into it
|
||||
# call cmake from within that directory using $cmakeArgs as its parameters
|
||||
- task: CMake@1
|
||||
displayName: '${{parameters.componentName }} CMake Flags'
|
||||
${{ if eq(parameters.os, 'almalinux8')}}:
|
||||
env:
|
||||
PATH: "/opt/rh/gcc-toolset-14/root/usr/bin:$(PATH)"
|
||||
MANPATH: "/opt/rh/gcc-toolset-14/root/usr/share/man:$(MANPATH)"
|
||||
INFOPATH: "/opt/rh/gcc-toolset-14/root/usr/share/info:$(INFOPATH)"
|
||||
PCP_DIR: "/opt/rh/gcc-toolset-14/root"
|
||||
LD_LIBRARY_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64:/opt/rh/gcc-toolset-14/root/usr/lib:$(LD_LIBRARY_PATH)"
|
||||
PKG_CONFIG_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:$(PKG_CONFIG_PATH)"
|
||||
inputs:
|
||||
workingDirectory: ${{ parameters.cmakeBuildDir }}
|
||||
${{ if eq(parameters.customInstallPath, true) }}:
|
||||
cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ${{ parameters.cmakeSourceDir }}
|
||||
${{ else }}:
|
||||
cmakeArgs: ${{ parameters.extraBuildFlags }} ..
|
||||
cmakeArgs: >-
|
||||
${{ iif(parameters.customInstallPath, join('', format('-DCMAKE_INSTALL_PREFIX={0}', parameters.installDir)), '') }}
|
||||
${{ iif(eq(parameters.os, 'almalinux8'), '-DCMAKE_SHARED_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/lib64 -L/opt/rh/gcc-toolset-14/root/usr/lib/gcc/x86_64-redhat-linux/14/"', '') }}
|
||||
${{ iif(eq(parameters.os, 'almalinux8'), '-DCMAKE_EXE_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/lib64 -L/opt/rh/gcc-toolset-14/root/usr/lib/gcc/x86_64-redhat-linux/14/"', '') }}
|
||||
-DCMAKE_CXX_FLAGS="${{ parameters.extraCxxFlags }} ${{ iif(and(eq(parameters.os, 'almalinux8'), parameters.useAmdclang), '--gcc-toolchain=/opt/rh/gcc-toolset-14/root', '') }}"
|
||||
${{ parameters.extraBuildFlags }}
|
||||
${{ parameters.cmakeSourceDir }}
|
||||
- ${{ if parameters.printDiskSpace }}:
|
||||
- script: df -h
|
||||
displayName: Disk space before build
|
||||
# equivalent to running make $cmakeTargetDir from $cmakeBuildDir
|
||||
# i.e., cd $cmakeBuildDir; make $cmakeTargetDir
|
||||
- task: CMake@1
|
||||
displayName: '${{parameters.componentName }} Build'
|
||||
${{ if and( eq(parameters.os, 'almalinux8'), eq(parameters.consolidateBuildAndInstall , true)) }}:
|
||||
displayName: '${{ parameters.componentName }} CMake Build and Install'
|
||||
${{ else }}:
|
||||
displayName: '${{ parameters.componentName }} CMake Build'
|
||||
${{ if eq(parameters.os, 'almalinux8')}}:
|
||||
env:
|
||||
PATH: "/opt/rh/gcc-toolset-14/root/usr/bin:$(PATH)"
|
||||
MANPATH: "/opt/rh/gcc-toolset-14/root/usr/share/man:$(MANPATH)"
|
||||
INFOPATH: "/opt/rh/gcc-toolset-14/root/usr/share/info:$(INFOPATH)"
|
||||
PCP_DIR: "/opt/rh/gcc-toolset-14/root"
|
||||
LD_LIBRARY_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64:/opt/rh/gcc-toolset-14/root/usr/lib:$(LD_LIBRARY_PATH)"
|
||||
PKG_CONFIG_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:$(PKG_CONFIG_PATH)"
|
||||
inputs:
|
||||
workingDirectory: ${{ parameters.cmakeBuildDir }}
|
||||
${{ if eq(parameters.customBuildTarget, '') }}:
|
||||
cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} ${{ parameters.multithreadFlag }}'
|
||||
${{ else }}:
|
||||
cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} --target ${{ parameters.customBuildTarget }} ${{ parameters.multithreadFlag }}'
|
||||
retryCountOnTaskFailure: 10
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
cmakeArgs: >-
|
||||
--build ${{ parameters.cmakeTargetDir }}
|
||||
${{ iif(and(eq(parameters.consolidateBuildAndInstall, true), ne(parameters.cmakeTarget, '')), format('--target {0}', parameters.cmakeTarget), '') }}
|
||||
${{ iif(and(ne(parameters.customBuildTarget, ''), ne(parameters.consolidateBuildAndInstall, true)), format('--target {0}', parameters.customBuildTarget), '') }}
|
||||
${{ parameters.multithreadFlag }}
|
||||
${{ if ne(parameters.os, 'almalinux8') }}:
|
||||
cmakeArgs: >-
|
||||
--build ${{ parameters.cmakeTargetDir }}
|
||||
${{ iif(ne(parameters.customBuildTarget, ''), format('--target {0}', parameters.customBuildTarget), '') }}
|
||||
${{ parameters.multithreadFlag }}
|
||||
- ${{ if parameters.printDiskSpace }}:
|
||||
- script: df -h
|
||||
displayName: Disk space after build
|
||||
# equivalent to running make $cmakeTarget from $cmakeBuildDir
|
||||
# e.g., make install
|
||||
- ${{ if eq(parameters.installEnabled, true) }}:
|
||||
- ${{ if and(eq(parameters.installEnabled, true), or(ne(parameters.os, 'almalinux8'), eq(parameters.consolidateBuildAndInstall, false))) }}:
|
||||
- task: CMake@1
|
||||
displayName: '${{parameters.componentName }} ${{ parameters.cmakeTarget }}'
|
||||
inputs:
|
||||
|
||||
@@ -4,6 +4,9 @@ parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
# submodule download behaviour
|
||||
# change to 'recursive' for repos with submodules
|
||||
- name: submoduleBehaviour
|
||||
@@ -15,3 +18,13 @@ steps:
|
||||
clean: true
|
||||
submodules: ${{ parameters.submoduleBehaviour }}
|
||||
retryCountOnTaskFailure: 3
|
||||
fetchFilter: blob:none
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }}
|
||||
path: sparse
|
||||
- ${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
- task: Bash@3
|
||||
displayName: Symlink sparse checkout
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ln -s $(Agent.BuildDirectory)/sparse/${{ parameters.sparseCheckoutDir }} $(Agent.BuildDirectory)/s
|
||||
|
||||
42
.azuredevops/templates/steps/dependencies-apt.yml
Normal file
42
.azuredevops/templates/steps/dependencies-apt.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default: []
|
||||
- name: registerROCmPackages
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos (apt)'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get update'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get fix'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
|
||||
- ${{ if gt(length(parameters.aptPackages), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get install ...'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
|
||||
@@ -1,25 +1,44 @@
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: ubuntu2204
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Get aqlprofile package name
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1)
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
- task: Bash@3
|
||||
displayName: 'Download aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$(packageName)
|
||||
- task: Bash@3
|
||||
displayName: 'Extract aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
dpkg-deb -R $(packageName) hsa-amd-aqlprofile
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
dpkg-deb -R $(packageName) hsa-amd-aqlprofile
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
sudo dnf -y install rpm-build cpio
|
||||
rpm2cpio $(packageName) | (cd hsa-amd-aqlprofile && cpio -idmv)
|
||||
- task: Bash@3
|
||||
displayName: 'Copy aqlprofile files'
|
||||
inputs:
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download Boost
|
||||
inputs:
|
||||
buildType: specific
|
||||
project: ROCm-CI
|
||||
definition: $(BOOST_DEPENDENCY_PIPELINE_ID)
|
||||
targetPath: $(Pipeline.Workspace)/d
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract Boost
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: $(Agent.BuildDirectory)/boost
|
||||
cleanDestinationFolder: true
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Cleanup Compressed Boost
|
||||
inputs:
|
||||
SourceFolder: $(Pipeline.Workspace)/d
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
- task: Bash@3
|
||||
displayName: 'List Boost files'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ls -1R $(Agent.BuildDirectory)/boost
|
||||
- task: Bash@3
|
||||
displayName: 'Link Boost shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/boost/lib | sudo tee /etc/ld.so.conf.d/boost.conf
|
||||
sudo cat /etc/ld.so.conf.d/boost.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
@@ -1,10 +1,23 @@
|
||||
# replace cmake from apt install with newest version using snap install
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: update cmake
|
||||
displayName: Install CMake 3.31
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo apt purge cmake -y
|
||||
sudo snap install cmake --classic --channel=3.31/stable
|
||||
hash -r
|
||||
CMAKE_VERSION=3.31.0
|
||||
CMAKE_ROOT="$(Pipeline.Workspace)/cmake"
|
||||
|
||||
echo "Downloading CMake $CMAKE_VERSION..."
|
||||
curl -fsSL -o cmake.tar.gz https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
|
||||
|
||||
echo "Extracting to $CMAKE_ROOT..."
|
||||
sudo mkdir -p $CMAKE_ROOT
|
||||
sudo tar --strip-components=1 -xz -C $CMAKE_ROOT -f cmake.tar.gz
|
||||
|
||||
echo "##vso[task.prependpath]$CMAKE_ROOT/bin"
|
||||
- task: Bash@3
|
||||
displayName: cmake --version
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
cmake --version
|
||||
|
||||
157
.azuredevops/templates/steps/dependencies-dnf.yml
Normal file
157
.azuredevops/templates/steps/dependencies-dnf.yml
Normal file
@@ -0,0 +1,157 @@
|
||||
parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default: []
|
||||
- name: registerROCmPackages
|
||||
type: boolean
|
||||
default: false
|
||||
# As par of installing gcc toolset and python,
|
||||
# the environment will install this base set of dnf packages.
|
||||
- name: basePackages
|
||||
type: object
|
||||
default:
|
||||
- epel-release
|
||||
- gcc-toolset-14
|
||||
- gcc-toolset-14-libatomic-devel
|
||||
- git
|
||||
- jq
|
||||
- numactl
|
||||
- python3.11
|
||||
- python3.11-pip
|
||||
- vim-common
|
||||
- wget
|
||||
# Instead of defining multiple arrays of packages per component,
|
||||
# we define a map of apt package names to dnf package names.
|
||||
- name: aptToDnfMap
|
||||
type: object
|
||||
default:
|
||||
bison: bison
|
||||
ccache: ccache
|
||||
cmake: cmake
|
||||
cuda-toolkit-12-9: cuda-compiler-12-9 cuda-toolkit-12-9
|
||||
libcudnn9-dev-cuda-12: libcudnn9-cuda-12
|
||||
dejagnu: dejagnu
|
||||
doxygen: doxygen
|
||||
# note: doxygen-doc is not available in dnf
|
||||
# libavcodec-dev, libavformat-dev, libavutil-dev come with ffmpeg-devel
|
||||
ffmpeg: ffmpeg ffmpeg-devel
|
||||
flex: flex
|
||||
# note: g++ is installed by default with gcc-toolset-14
|
||||
# note: gawk is already installed
|
||||
# note: gcc-toolset-14-gfortran is installed by default with gcc-toolset-14
|
||||
# note: git is in the base packages list
|
||||
graphviz: graphviz
|
||||
libbabeltrace-dev: libbabeltrace-devel
|
||||
libbison-dev: bison-devel
|
||||
libboost-program-options-dev: boost-devel
|
||||
# note: libdrm-amdgpu1 is not available in dnf
|
||||
libdrm-dev: libdrm-devel
|
||||
libdrm-amdgpu-dev: libdrm-amdgpu-devel
|
||||
libdw-dev: elfutils-devel
|
||||
libelf-dev: elfutils-libelf-devel
|
||||
libexpat-dev: expat-devel
|
||||
libffi-dev: libffi-devel
|
||||
libfftw3-dev: fftw-devel
|
||||
libgmp-dev: gmp-devel
|
||||
liblzma-dev: xz-devel
|
||||
libmpfr-dev: mpfr-devel
|
||||
libmsgpack-dev: msgpack-devel
|
||||
libncurses5-dev: ncurses-devel
|
||||
libnuma-dev: numactl-devel
|
||||
libopenmpi-dev: openmpi-devel
|
||||
libpci-dev: libpciaccess-devel
|
||||
libssl-dev: openssl-devel
|
||||
# note: libstdc++-devel is in the base packages list
|
||||
libsystemd-dev: systemd-devel
|
||||
libtool: libtool
|
||||
# note: libudev-dev is part of systemd-devel
|
||||
libva-amdgpu-dev: libva-amdgpu-devel
|
||||
mesa-amdgpu-va-drivers: mesa-amdgpu-va-drivers
|
||||
mesa-common-dev: mesa-libGL-devel
|
||||
ncurses-dev: ncurses-devel
|
||||
# note: llvm needs ninja-build version newer than what dnf provides
|
||||
ocl-icd-libopencl1: ocl-icd
|
||||
ocl-icd-opencl-dev: ocl-icd-devel
|
||||
opencl-headers: opencl-headers
|
||||
parallel: parallel
|
||||
pkg-config: pkgconf-pkg-config
|
||||
# note: python3 is the default python in AlmaLinux 8
|
||||
python3-dev: python3.11-devel
|
||||
# note: python3.11-pip is already installed when updating to python 3.11
|
||||
# note: python3.11-setuptools is already installed when updating to python 3.11
|
||||
texinfo: texinfo
|
||||
zlib1g-dev: zlib-devel
|
||||
|
||||
steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos (dnf)'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo rpm --import https://repo.radeon.com/rocm/rocm.gpg.key
|
||||
echo '[amdgpu]' | sudo tee /etc/yum.repos.d/amdgpu.repo > /dev/null
|
||||
echo "name=amdgpu" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
|
||||
echo "baseurl=https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/rhel/8.10/main/x86_64/" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
|
||||
echo "enabled=1" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
|
||||
echo "gpgcheck=1" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
|
||||
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
|
||||
echo '[rocm]' | sudo tee /etc/yum.repos.d/rocm.repo > /dev/null
|
||||
echo "name=ROCm$(REPO_RADEON_VERSION)" | sudo tee --append /etc/yum.repos.d/rocm.repo
|
||||
echo "baseurl=https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/" | sudo tee --append /etc/yum.repos.d/rocm.repo
|
||||
echo "enabled=1" | sudo tee --append /etc/yum.repos.d/rocm.repo
|
||||
echo "gpgcheck=1" | sudo tee --append /etc/yum.repos.d/rocm.repo
|
||||
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" | sudo tee --append /etc/yum.repos.d/rocm.repo
|
||||
sudo dnf clean all
|
||||
sudo dnf makecache
|
||||
- task: Bash@3
|
||||
displayName: 'Install base dnf packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo dnf config-manager --set-enabled powertools
|
||||
# rpm fusion free repo for some dependencies
|
||||
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
|
||||
sudo dnf -y install ${{ join(' ', parameters.basePackages) }}
|
||||
- task: Bash@3
|
||||
displayName: 'Check gcc environment'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "=== Versions and sanity checks ==="
|
||||
gcc --version
|
||||
g++ --version
|
||||
gcc -print-file-name=libstdc++.so
|
||||
g++ -print-file-name=libstdc++.so
|
||||
- task: Bash@3
|
||||
displayName: 'Set python 3.11 as default'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo dnf -y module disable python36
|
||||
sudo rm -f /usr/local/bin/python3.12 /usr/local/bin/python3.13 /usr/local/bin/python3.14
|
||||
sudo alternatives --set python /usr/bin/python3.11
|
||||
sudo alternatives --set python3 /usr/bin/python3.11
|
||||
python3 --version
|
||||
python3 -m pip install --upgrade pip setuptools wheel
|
||||
- ${{ each pkg in parameters.aptPackages }}:
|
||||
# note: llvm needs ninja-build version newer than what dnf provides
|
||||
- ${{ if eq(pkg, 'ninja-build') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Install ninja 1.11.1'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip
|
||||
sudo dnf -y install unzip
|
||||
unzip ninja-linux.zip
|
||||
sudo mv ninja /usr/local/bin/ninja
|
||||
sudo chmod +x /usr/local/bin/ninja
|
||||
echo "##vso[task.prependpath]/usr/local/bin"
|
||||
- ${{ if ne(parameters.aptToDnfMap[pkg], '') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'dnf install ${{ parameters.aptToDnfMap[pkg] }}'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo dnf -y install ${{ parameters.aptToDnfMap[pkg] }}
|
||||
@@ -9,56 +9,24 @@ parameters:
|
||||
- name: registerROCmPackages
|
||||
type: boolean
|
||||
default: false
|
||||
- name: packageManager
|
||||
type: string
|
||||
default: apt
|
||||
|
||||
steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
# firefox takes time to upgrade and is not needed for CI workloads, hold version
|
||||
- task: Bash@3
|
||||
continueOnError: true
|
||||
displayName: 'sudo apt-mark hold firefox'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo apt-mark hold firefox
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get update'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get upgrade'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes upgrade
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get fix'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
|
||||
- ${{ if gt(length(parameters.aptPackages), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get install ...'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
|
||||
- ${{ if eq(parameters.packageManager, 'apt') }}:
|
||||
- template: dependencies-apt.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: ${{ parameters.registerROCmPackages }}
|
||||
- ${{ if eq(parameters.packageManager, 'dnf') }}:
|
||||
- template: dependencies-dnf.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: ${{ parameters.registerROCmPackages }}
|
||||
- ${{ if gt(length(parameters.pipModules), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'pip install ...'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}
|
||||
script: python3 -m pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}
|
||||
|
||||
@@ -13,6 +13,9 @@ parameters:
|
||||
- name: dependencyList
|
||||
type: object
|
||||
default: []
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
@@ -36,6 +39,10 @@ parameters:
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
# monorepo related parameters
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
- name: componentVarList
|
||||
type: object
|
||||
@@ -103,7 +110,7 @@ parameters:
|
||||
hipCUB:
|
||||
pipelineId: $(HIPCUB_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
hipFFT:
|
||||
pipelineId: $(HIPFFT_PIPELINE_ID)
|
||||
@@ -123,7 +130,7 @@ parameters:
|
||||
hipRAND:
|
||||
pipelineId: $(HIPRAND_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
hipSOLVER:
|
||||
pipelineId: $(HIPSOLVER_PIPELINE_ID)
|
||||
@@ -258,7 +265,7 @@ parameters:
|
||||
rocPRIM:
|
||||
pipelineId: $(ROCPRIM_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocprofiler:
|
||||
pipelineId: $(ROCPROFILER_PIPELINE_ID)
|
||||
@@ -298,7 +305,7 @@ parameters:
|
||||
rocRAND:
|
||||
pipelineId: $(ROCRAND_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocr_debug_agent:
|
||||
pipelineId: $(ROCR_DEBUG_AGENT_PIPELINE_ID)
|
||||
@@ -323,7 +330,7 @@ parameters:
|
||||
rocThrust:
|
||||
pipelineId: $(ROCTHRUST_PIPELINE_ID)
|
||||
stagingBranch: develop
|
||||
mainlineBranch: mainline
|
||||
mainlineBranch: develop
|
||||
hasGpuTarget: true
|
||||
roctracer:
|
||||
pipelineId: $(ROCTRACER_PIPELINE_ID)
|
||||
@@ -361,7 +368,7 @@ steps:
|
||||
pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
|
||||
fileFilter: "${{ split(dependency, ':')[1] }}*${{ parameters.gpuTarget }}"
|
||||
fileFilter: "${{ split(dependency, ':')[1] }}*_${{ parameters.os }}_${{ parameters.gpuTarget }}"
|
||||
# dependencySource = staging
|
||||
${{ if eq(parameters.dependencySource, 'staging')}}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
|
||||
@@ -384,6 +391,14 @@ steps:
|
||||
${{ else }}:
|
||||
branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
|
||||
# no colon (:) found in this item in the list
|
||||
- ${{ elseif containsValue(split(parameters.downstreamAggregateNames, '+'), dependency) }}:
|
||||
- template: local-artifact-download.yml
|
||||
parameters:
|
||||
${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
|
||||
gpuTarget: ${{ parameters.gpuTarget }}
|
||||
preTargetFilter: ${{ dependency }}
|
||||
os: ${{ parameters.os }}
|
||||
buildType: current
|
||||
- ${{ else }}:
|
||||
- template: artifact-download.yml
|
||||
parameters:
|
||||
@@ -391,7 +406,9 @@ steps:
|
||||
pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
|
||||
fileFilter: ${{ parameters.gpuTarget }}
|
||||
fileFilter: ${{ parameters.os }}_${{ parameters.gpuTarget }}
|
||||
${{ else }}:
|
||||
fileFilter: ${{ parameters.os }}
|
||||
# dependencySource = staging
|
||||
${{ if eq(parameters.dependencySource, 'staging')}}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
|
||||
@@ -419,14 +436,16 @@ steps:
|
||||
displayName: Symlink from rocm/llvm to rocm/lib/llvm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
script: |
|
||||
sudo mkdir -p $(Agent.BuildDirectory)/rocm/lib
|
||||
sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- task: Bash@3
|
||||
displayName: Symlink executables from rocm/llvm/bin to rocm/bin
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
for file in amdclang amdclang++ amdclang-cl amdclang-cpp amdflang amdlld aompcc mygpu mycpu offload-arch; do
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
|
||||
sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
|
||||
done
|
||||
# dlopen calls within a ctest or pytest sequence runs into issues when shared library symlink convention is not followed
|
||||
# the convention is as follows:
|
||||
@@ -463,7 +482,7 @@ steps:
|
||||
displayName: 'List downloaded ROCm files'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ls -1R $(Agent.BuildDirectory)/rocm
|
||||
script: ls -la1R $(Agent.BuildDirectory)/rocm
|
||||
- ${{ if eq(parameters.skipLibraryLinking, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
@@ -471,8 +490,10 @@ steps:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/rocm/lib | sudo tee /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
|
||||
53
.azuredevops/templates/steps/dependencies-vendor.yml
Normal file
53
.azuredevops/templates/steps/dependencies-vendor.yml
Normal file
@@ -0,0 +1,53 @@
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
- name: dependencyList
|
||||
type: object
|
||||
- name: pipelineIdList
|
||||
type: object
|
||||
default:
|
||||
boost: 250
|
||||
grpc: 72
|
||||
gtest: 73
|
||||
half560: 68
|
||||
lapack: 69
|
||||
|
||||
steps:
|
||||
- ${{ each dependency in parameters.dependencyList }}:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ dependency }}
|
||||
inputs:
|
||||
project: ROCm-CI
|
||||
buildType: specific
|
||||
targetPath: $(Pipeline.Workspace)/d
|
||||
definition: ${{ parameters.pipelineIdList[dependency] }}
|
||||
itemPattern: '**/*${{ parameters.os }}*'
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ dependency }}
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: $(Agent.BuildDirectory)/vendor
|
||||
cleanDestinationFolder: true
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up ${{ dependency }}
|
||||
inputs:
|
||||
SourceFolder: $(Pipeline.Workspace)/d
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
- task: Bash@3
|
||||
displayName: List vendored files
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ls -la1R $(Agent.BuildDirectory)/vendor
|
||||
- task: Bash@3
|
||||
displayName: Link vendored shared libraries
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/vendor/lib | sudo tee -a /etc/ld.so.conf.d/vendor.conf
|
||||
echo $(Agent.BuildDirectory)/vendor/lib64 | sudo tee -a /etc/ld.so.conf.d/vendor.conf
|
||||
sudo cat /etc/ld.so.conf.d/vendor.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
@@ -106,6 +106,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- gfx90a
|
||||
- gfx942
|
||||
|
||||
steps:
|
||||
# these steps should only be run if there was a failure or warning
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
# It can be overridden to download any artifact from any pipeline, given the appropriate build/pipeline IDs
|
||||
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
@@ -29,25 +32,27 @@ parameters:
|
||||
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Build'
|
||||
displayName: Download ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
|
||||
inputs:
|
||||
${{ if eq(parameters.buildType, 'specific') }}:
|
||||
buildType: specific
|
||||
buildVersionToDownload: specific
|
||||
project: ROCm-CI
|
||||
definition: ${{ parameters.definitionId }}
|
||||
buildId: ${{ parameters.buildId }}
|
||||
itemPattern: '**/*${{ parameters.preTargetFilter }}*${{ parameters.gpuTarget }}*${{ parameters.postTargetFilter }}*'
|
||||
${{ if ne(parameters.definitionId, 0) }}:
|
||||
definition: ${{ parameters.definitionId }}
|
||||
${{ if ne(parameters.buildId, 0) }}:
|
||||
buildId: ${{ parameters.buildId }}
|
||||
itemPattern: '**/*${{ parameters.preTargetFilter }}*${{ parameters.os }}_${{ parameters.gpuTarget }}*${{ parameters.postTargetFilter }}*'
|
||||
targetPath: $(Pipeline.Workspace)/d
|
||||
- task: ExtractFiles@1
|
||||
displayName: 'Extract Pipeline Build'
|
||||
displayName: Extract ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: '$(Agent.BuildDirectory)/rocm'
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Clean up Compressed Pipeline Build'
|
||||
displayName: Clean up ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
|
||||
inputs:
|
||||
SourceFolder: '$(Pipeline.Workspace)/d'
|
||||
Contents: '/**/*.tar.xz'
|
||||
|
||||
@@ -1,10 +1,19 @@
|
||||
parameters:
|
||||
- name: artifactName
|
||||
- name: componentName
|
||||
type: string
|
||||
default: 'drop'
|
||||
default: $(Build.DefinitionName)
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
- name: artifactName
|
||||
type: string
|
||||
default: drop
|
||||
- name: os
|
||||
type: string
|
||||
default: 'ubuntu2204'
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
@@ -25,8 +34,9 @@ steps:
|
||||
|
||||
IS_TAG_BUILD=$(jq 'has("release_repo")' resources.repositories)
|
||||
IS_AOMP_BUILD=$(jq 'has("aomp_repo")' resources.repositories)
|
||||
IS_MATHLIBS_BUILD=$(jq 'has("libraries_repo")' resources.repositories)
|
||||
|
||||
if [ "$IS_TAG_BUILD" = "true" ] || [ "$IS_AOMP_BUILD" = "true" ]; then
|
||||
if [ "$IS_TAG_BUILD" = "true" ] || [ "$IS_AOMP_BUILD" = "true" ] || [ "$IS_MATHLIBS_BUILD" = "true" ]; then
|
||||
exclude_keys=("pipelines_repo" "self") # Triggered by a file under ROCm/ROCm
|
||||
else
|
||||
exclude_keys=("pipelines_repo") # Triggered by a file under a component repo
|
||||
@@ -45,6 +55,7 @@ steps:
|
||||
buildId: "$(Build.BuildId)",
|
||||
repoId: $entry.value.id,
|
||||
repoName: $entry.value.name,
|
||||
repoSparse: "${{ parameters.sparseCheckoutDir }}",
|
||||
repoRef: $entry.value.ref,
|
||||
repoUrl: $entry.value.url,
|
||||
repoVersion: $entry.value.version
|
||||
@@ -55,7 +66,7 @@ steps:
|
||||
)
|
||||
' resources.repositories)
|
||||
|
||||
manifest_json=$(Build.ArtifactStagingDirectory)/manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json
|
||||
manifest_json=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json
|
||||
|
||||
dependencies=()
|
||||
for manifest_file in $(Pipeline.Workspace)/d/**/manifest_*.json; do
|
||||
@@ -81,6 +92,7 @@ steps:
|
||||
"<tr><td>" + .buildNumber + "</td>" +
|
||||
"<td><a href=\"https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=" + .buildId + "\">" + .buildId + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "\">" + .repoName + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "/" + .repoSparse + "\">" + .repoSparse + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "\">" + .repoRef + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "/commit/" + .repoVersion + "\">" + .repoVersion + "</a></td></tr>"
|
||||
')
|
||||
@@ -93,6 +105,7 @@ steps:
|
||||
"<tr><td>" + .buildNumber + "</td>" +
|
||||
"<td><a href=\"https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=" + .buildId + "\">" + .buildId + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "\">" + .repoName + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "/" + .repoSparse + "\">" + .repoSparse + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "\">" + .repoRef + "</a></td>" +
|
||||
"<td><a href=\"" + .repoUrl + "/commit/" + .repoVersion + "\">" + .repoVersion + "</a></td></tr>"
|
||||
')
|
||||
@@ -107,7 +120,7 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
manifest_html=$(Build.ArtifactStagingDirectory)/manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
manifest_html=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
cat <<EOF > $manifest_html
|
||||
<html>
|
||||
<h1>Manifest</h1>
|
||||
@@ -117,6 +130,7 @@ steps:
|
||||
<th>Build Number</th>
|
||||
<th>Build ID</th>
|
||||
<th>Repo Name</th>
|
||||
<th>Repo Sparse</th>
|
||||
<th>Repo Ref</th>
|
||||
<th>Repo Version</th>
|
||||
</tr>
|
||||
@@ -128,6 +142,7 @@ steps:
|
||||
<th>Build Number</th>
|
||||
<th>Build ID</th>
|
||||
<th>Repo Name</th>
|
||||
<th>Repo Sparse</th>
|
||||
<th>Repo Ref</th>
|
||||
<th>Repo Version</th>
|
||||
</tr>
|
||||
@@ -148,7 +163,7 @@ steps:
|
||||
continueOnError: true
|
||||
inputs:
|
||||
tabName: Manifest
|
||||
reportDir: $(Build.ArtifactStagingDirectory)/manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
reportDir: $(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
- task: Bash@3
|
||||
displayName: Save manifest artifact file name
|
||||
condition: always()
|
||||
@@ -157,5 +172,5 @@ steps:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
|
||||
echo "manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
|
||||
echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
|
||||
echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
|
||||
|
||||
@@ -25,7 +25,7 @@ steps:
|
||||
echo "Fetching CK build ID for commit $CK_COMMIT"
|
||||
CK_CHECKS_URL="$GH_API/composable_kernel/commits/${CK_COMMIT}/check-runs"
|
||||
CK_BUILD_ID=$(curl -s $CK_CHECKS_URL | \
|
||||
jq '.check_runs[] | select(.name == "composable_kernel" and .app.slug == "azure-pipelines") | .details_url' | \
|
||||
jq '.check_runs[] | select(.name == "composable_kernel" and .app.slug == "azure-pipelines" and .conclusion == "success") | .details_url' | \
|
||||
tr -d '"' | grep -oP 'buildId=\K\d+')
|
||||
|
||||
# If none found, use latest successful CK build instead
|
||||
|
||||
@@ -3,10 +3,27 @@
|
||||
# also display installed components and packages
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: List apt packages
|
||||
displayName: OS Version
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: apt list --installed
|
||||
script: cat /etc/os-release
|
||||
- task: Bash@3
|
||||
displayName: List installed packages (apt, dnf, or yum)
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
if command -v apt >/dev/null 2>&1; then
|
||||
echo "Listing installed packages with apt:"
|
||||
apt list --installed
|
||||
elif command -v dnf >/dev/null 2>&1; then
|
||||
echo "Listing installed packages with dnf:"
|
||||
dnf list installed
|
||||
elif command -v yum >/dev/null 2>&1; then
|
||||
echo "Listing installed packages with yum:"
|
||||
yum list installed
|
||||
else
|
||||
echo "No supported package manager found (apt, dnf, yum)."
|
||||
fi
|
||||
- task: Bash@3
|
||||
displayName: Print Python version
|
||||
inputs:
|
||||
@@ -16,7 +33,7 @@ steps:
|
||||
displayName: List Python packages
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: pip list -v
|
||||
script: python3 -m pip list -v
|
||||
# The "Azure Pipelines" agents install CMake in multiple ways, including a standalone install into /usr/local/bin:
|
||||
# https://github.com/actions/runner-images/blob/6d939a3ab352a54a021dd67b071577287b6f14a5/images/ubuntu/scripts/build/install-cmake.sh#L27
|
||||
# This standalone CMake does not have a fixed version, and is not the same version as the one installed by the package manager
|
||||
|
||||
@@ -2,21 +2,27 @@ parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: ''
|
||||
- name: os
|
||||
type: string
|
||||
default: ubuntu2204
|
||||
- name: testDir
|
||||
type: string
|
||||
default: 'build'
|
||||
default: build
|
||||
- name: testExecutable
|
||||
type: string
|
||||
default: 'ctest'
|
||||
default: ctest
|
||||
- name: testParameters
|
||||
type: string
|
||||
default: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
default: --output-on-failure --force-new-ctest-process --output-junit test_output.xml
|
||||
- name: extraTestParameters
|
||||
type: string
|
||||
default: ''
|
||||
- name: testOutputFile
|
||||
type: string
|
||||
default: test_output.xml
|
||||
- name: testOutputFormat
|
||||
type: string
|
||||
default: 'JUnit'
|
||||
default: JUnit
|
||||
values:
|
||||
- JUnit
|
||||
- NUnit
|
||||
@@ -26,26 +32,28 @@ parameters:
|
||||
- name: testPublishResults
|
||||
type: boolean
|
||||
default: true
|
||||
- name: allowPartiallySucceededBuilds
|
||||
- name: allowComponentTestFailure
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
- aomp
|
||||
- HIPIFY
|
||||
- MIVisionX
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-sdk
|
||||
- roctracer
|
||||
# the following do not use this template but allow test failures, included for completeness
|
||||
- aomp
|
||||
- ROCgdb
|
||||
|
||||
steps:
|
||||
# run test, continue on failure to publish results
|
||||
# and to publish build artifacts
|
||||
- task: Bash@3
|
||||
displayName: '${{ parameters.componentName }} Test'
|
||||
continueOnError: ${{ containsValue(parameters.allowPartiallySucceededBuilds, parameters.componentName) }}
|
||||
continueOnError: ${{ containsValue(parameters.allowComponentTestFailure, parameters.componentName) }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ${{ parameters.testExecutable }} ${{ parameters.testParameters }}
|
||||
script: |
|
||||
${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
|
||||
${{ parameters.testExecutable }} ${{ parameters.testParameters }} ${{ parameters.extraTestParameters }}
|
||||
workingDirectory: ${{ parameters.testDir }}
|
||||
- ${{ if parameters.testPublishResults }}:
|
||||
- task: PublishTestResults@2
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
variables:
|
||||
- name: RESOURCES_REPOSITORIES
|
||||
value: $[ convertToJson(resources.repositories) ]
|
||||
- name: CCACHE_DIR
|
||||
value: $(Pipeline.Workspace)/ccache
|
||||
- name: CI_ROOT_PATH
|
||||
value: /.azuredevops
|
||||
- name: CI_COMPONENT_PATH
|
||||
@@ -30,320 +32,136 @@ variables:
|
||||
- name: GFX90A_TEST_POOL
|
||||
value: gfx90a_test_pool
|
||||
- name: LATEST_RELEASE_VERSION
|
||||
value: 6.4.0
|
||||
value: 6.4.1
|
||||
- name: REPO_RADEON_VERSION
|
||||
value: 6.4
|
||||
value: 6.4.1
|
||||
- name: NEXT_RELEASE_VERSION
|
||||
value: 6.5.0
|
||||
value: 7.0.0
|
||||
- name: LATEST_RELEASE_TAG
|
||||
value: rocm-6.4.0
|
||||
value: rocm-6.4.1
|
||||
- name: DOCKER_SKIP_GFX
|
||||
value: gfx90a
|
||||
- name: AMDMIGRAPHX_GFX942_TEST_PIPELINE_ID
|
||||
value: 197
|
||||
- name: AMDMIGRAPHX_PIPELINE_ID
|
||||
value: 113
|
||||
- name: AMDMIGRAPHX_TAGGED_PIPELINE_ID
|
||||
value: 60
|
||||
- name: AMDSMI_PIPELINE_ID
|
||||
value: 99
|
||||
- name: AMDSMI_TAGGED_PIPELINE_ID
|
||||
value: 33
|
||||
- name: AOMP_EXTRAS_PIPELINE_ID
|
||||
value: 111
|
||||
- name: AOMP_EXTRAS_TAGGED_PIPELINE_ID
|
||||
value: 75
|
||||
- name: AOMP_PIPELINE_ID
|
||||
value: 115
|
||||
- name: AOMP_TAGGED_PIPELINE_ID
|
||||
value: 76
|
||||
- name: CCACHE_DIR
|
||||
value: $(Pipeline.Workspace)/ccache
|
||||
- name: CLR_PIPELINE_ID
|
||||
value: 145
|
||||
- name: CLR_TAGGED_PIPELINE_ID
|
||||
value: 71
|
||||
- name: COMPOSABLE_KERNEL_GFX942_TEST_PIPELINE_ID
|
||||
value: 179
|
||||
- name: COMPOSABLE_KERNEL_PIPELINE_ID
|
||||
value: 86
|
||||
- name: COMPOSABLE_KERNEL_TAGGED_PIPELINE_ID
|
||||
value: 38
|
||||
- name: FLANG_LEGACY_PIPELINE_ID
|
||||
value: 77
|
||||
- name: FLANG_LEGACY_TAGGED_PIPELINE_ID
|
||||
value: 77
|
||||
- name: HALF_PIPELINE_ID
|
||||
value: 101
|
||||
- name: HALF_TAGGED_PIPELINE_ID
|
||||
value: 11
|
||||
- name: HALF560_PIPELINE_ID
|
||||
value: 68
|
||||
- name: HALF560_BUILD_ID
|
||||
value: 621
|
||||
- name: HIP_PIPELINE_ID
|
||||
value: 93
|
||||
- name: HIP_TAGGED_PIPELINE_ID
|
||||
value: 31
|
||||
- name: HIP_TESTS_PIPELINE_ID
|
||||
value: 233
|
||||
- name: HIP_TESTS_TAGGED_PIPELINE_ID
|
||||
value: 220
|
||||
- name: HIPBLAS_COMMON_PIPELINE_ID
|
||||
value: 223
|
||||
- name: HIPBLAS_COMMON_TAGGED_PIPELINE_ID
|
||||
value: 224
|
||||
- name: HIPBLAS_GFX942_TEST_PIPELINE_ID
|
||||
value: 202
|
||||
- name: HIPBLAS_PIPELINE_ID
|
||||
value: 87
|
||||
- name: HIPBLAS_TAGGED_PIPELINE_ID
|
||||
value: 44
|
||||
- name: HIPBLASLT_GFX942_TEST_PIPELINE_ID
|
||||
value: 187
|
||||
- name: HIPBLASLT_PIPELINE_ID
|
||||
value: 112
|
||||
- name: HIPBLASLT_TAGGED_PIPELINE_ID
|
||||
value: 45
|
||||
- name: HIPCUB_GFX942_TEST_PIPELINE_ID
|
||||
value: 186
|
||||
- name: HIPCUB_PIPELINE_ID
|
||||
value: 97
|
||||
- name: HIPCUB_TAGGED_PIPELINE_ID
|
||||
value: 46
|
||||
- name: HIPFFT_GFX942_TEST_PIPELINE_ID
|
||||
value: 198
|
||||
value: 277
|
||||
- name: HIPFFT_PIPELINE_ID
|
||||
value: 121
|
||||
- name: HIPFFT_TAGGED_PIPELINE_ID
|
||||
value: 12
|
||||
- name: HIPFORT_PIPELINE_ID
|
||||
value: 102
|
||||
- name: HIPFORT_TAGGED_PIPELINE_ID
|
||||
value: 34
|
||||
- name: HIPIFY_PIPELINE_ID
|
||||
value: 92
|
||||
- name: HIPIFY_TAGGED_PIPELINE_ID
|
||||
value: 13
|
||||
- name: HIPRAND_GFX942_TEST_PIPELINE_ID
|
||||
value: 188
|
||||
- name: HIPRAND_PIPELINE_ID
|
||||
value: 90
|
||||
- name: HIPRAND_TAGGED_PIPELINE_ID
|
||||
value: 42
|
||||
- name: HIPSOLVER_GFX942_TEST_PIPELINE_ID
|
||||
value: 201
|
||||
value: 275
|
||||
- name: HIPSOLVER_PIPELINE_ID
|
||||
value: 84
|
||||
- name: HIPSOLVER_TAGGED_PIPELINE_ID
|
||||
value: 52
|
||||
- name: HIPSPARSE_GFX942_TEST_PIPELINE_ID
|
||||
value: 195
|
||||
- name: HIPSPARSE_PIPELINE_ID
|
||||
value: 83
|
||||
- name: HIPSPARSE_TAGGED_PIPELINE_ID
|
||||
value: 14
|
||||
- name: HIPSPARSELT_GFX942_TEST_PIPELINE_ID
|
||||
value: 200
|
||||
- name: HIPSPARSELT_PIPELINE_ID
|
||||
value: 104
|
||||
- name: HIPSPARSELT_TAGGED_PIPELINE_ID
|
||||
value: 53
|
||||
- name: HIPTENSOR_GFX942_TEST_PIPELINE_ID
|
||||
value: 192
|
||||
- name: HIPTENSOR_PIPELINE_ID
|
||||
value: 105
|
||||
- name: HIPTENSOR_TAGGED_PIPELINE_ID
|
||||
value: 56
|
||||
- name: LLVM_PROJECT_PIPELINE_ID
|
||||
value: 2
|
||||
- name: LLVM_PROJECT_TAGGED_PIPELINE_ID
|
||||
value: 8
|
||||
- name: MIOPEN_PIPELINE_ID
|
||||
value: 108
|
||||
- name: MIOPEN_TAGGED_PIPELINE_ID
|
||||
value: 58
|
||||
- name: MIVISIONX_PIPELINE_ID
|
||||
value: 80
|
||||
- name: MIVISIONX_TAGGED_PIPELINE_ID
|
||||
value: 18
|
||||
- name: OMNIPERF_PIPELINE_ID
|
||||
value: 241
|
||||
- name: OMNIPERF_TAGGED_PIPELINE_ID
|
||||
value: 242
|
||||
- name: OMNITRACE_PIPELINE_ID
|
||||
value: 253
|
||||
- name: OMNITRACE_TAGGED_PIPELINE_ID
|
||||
value: 252
|
||||
- name: RCCL_GFX942_TEST_PIPELINE_ID
|
||||
value: 184
|
||||
- name: RCCL_PIPELINE_ID
|
||||
value: 107
|
||||
- name: RCCL_TAGGED_PIPELINE_ID
|
||||
value: 15
|
||||
- name: RDC_PIPELINE_ID
|
||||
value: 100
|
||||
- name: RDC_TAGGED_PIPELINE_ID
|
||||
value: 59
|
||||
- name: ROCAL_PIPELINE_ID
|
||||
value: 151
|
||||
- name: ROCALUTION_GFX942_TEST_PIPELINE_ID
|
||||
value: 196
|
||||
- name: ROCALUTION_PIPELINE_ID
|
||||
value: 89
|
||||
- name: ROCALUTION_TAGGED_PIPELINE_ID
|
||||
value: 16
|
||||
- name: ROCBLAS_GFX942_TEST_PIPELINE_ID
|
||||
value: 185
|
||||
- name: ROCBLAS_PIPELINE_ID
|
||||
value: 85
|
||||
- name: ROCBLAS_TAGGED_PIPELINE_ID
|
||||
value: 32
|
||||
- name: ROCDBGAPI_PIPELINE_ID
|
||||
value: 135
|
||||
- name: ROCDBGAPI_TAGGED_PIPELINE_ID
|
||||
value: 17
|
||||
- name: ROCDECODE_PIPELINE_ID
|
||||
value: 79
|
||||
- name: ROCDECODE_TAGGED_PIPELINE_ID
|
||||
value: 21
|
||||
- name: ROCFFT_GFX942_TEST_PIPELINE_ID
|
||||
value: 189
|
||||
- name: ROCFFT_PIPELINE_ID
|
||||
value: 120
|
||||
- name: ROCFFT_TAGGED_PIPELINE_ID
|
||||
value: 19
|
||||
- name: ROCGDB_PIPELINE_ID
|
||||
value: 134
|
||||
- name: ROCGDB_TAGGED_PIPELINE_ID
|
||||
value: 50
|
||||
- name: ROCJPEG_PIPELINE_ID
|
||||
value: 262
|
||||
- name: ROCJPEG_TAGGED_PIPELINE_ID
|
||||
value: 263
|
||||
- name: ROCM_BANDWIDTH_TEST_PIPELINE_ID
|
||||
value: 88
|
||||
- name: ROCM_BANDWIDTH_TEST_TAGGED_PIPELINE_ID
|
||||
value: 23
|
||||
- name: ROCM_CMAKE_PIPELINE_ID
|
||||
value: 6
|
||||
- name: ROCM_CMAKE_TAGGED_PIPELINE_ID
|
||||
value: 7
|
||||
- name: ROCM_CORE_PIPELINE_ID
|
||||
value: 103
|
||||
- name: ROCM_CORE_TAGGED_PIPELINE_ID
|
||||
value: 22
|
||||
- name: ROCM_EXAMPLES_GFX942_TEST_PIPELINE_ID
|
||||
value: 204
|
||||
- name: ROCM_EXAMPLES_PIPELINE_ID
|
||||
value: 216
|
||||
- name: ROCM_EXAMPLES_TAGGED_PIPELINE_ID
|
||||
value: 245
|
||||
- name: ROCM_SMI_LIB_PIPELINE_ID
|
||||
value: 96
|
||||
- name: ROCM_SMI_LIB_TAGGED_PIPELINE_ID
|
||||
value: 47
|
||||
- name: ROCMINFO_PIPELINE_ID
|
||||
value: 91
|
||||
- name: ROCMINFO_TAGGED_PIPELINE_ID
|
||||
value: 27
|
||||
- name: ROCMLIR_PIPELINE_ID
|
||||
value: 229
|
||||
- name: ROCMLIR_TAGGED_PIPELINE_ID
|
||||
value: 62
|
||||
- name: ROCMVALIDATIONSUITE_PIPELINE_ID
|
||||
value: 106
|
||||
- name: ROCMVALIDATIONSUITE_TAGGED_PIPELINE_ID
|
||||
value: 43
|
||||
- name: ROCPRIM_GFX942_TEST_PIPELINE_ID
|
||||
value: 180
|
||||
- name: ROCPRIM_PIPELINE_ID
|
||||
value: 82
|
||||
- name: ROCPRIM_TAGGED_PIPELINE_ID
|
||||
value: 20
|
||||
- name: ROCPROFILER_GFX942_TEST_PIPELINE_ID
|
||||
value: 190
|
||||
value: 273
|
||||
- name: ROCPROFILER_COMPUTE_PIPELINE_ID
|
||||
value: 257
|
||||
- name: ROCPROFILER_COMPUTE_TAGGED_PIPELINE_ID
|
||||
value: 258
|
||||
- name: ROCPROFILER_REGISTER_PIPELINE_ID
|
||||
value: 1
|
||||
- name: ROCPROFILER_REGISTER_TAGGED_PIPELINE_ID
|
||||
value: 25
|
||||
- name: ROCPROFILER_SDK_PIPELINE_ID
|
||||
value: 246
|
||||
- name: ROCPROFILER_SDK_TAGGED_PIPELINE_ID
|
||||
value: 234
|
||||
- name: ROCPROFILER_SYSTEMS_PIPELINE_ID
|
||||
value: 255
|
||||
- name: ROCPROFILER_SYSTEMS_TAGGED_PIPELINE_ID
|
||||
value: 254
|
||||
- name: ROCPROFILER_PIPELINE_ID
|
||||
value: 143
|
||||
- name: ROCPROFILER_TAGGED_PIPELINE_ID
|
||||
value: 28
|
||||
- name: ROCPYDECODE_PIPELINE_ID
|
||||
value: 239
|
||||
- name: ROCPYDECODE_TAGGED_PIPELINE_ID
|
||||
value: 232
|
||||
- name: ROCR_DEBUG_AGENT_PIPELINE_ID
|
||||
value: 136
|
||||
- name: ROCR_DEBUG_AGENT_TAGGED_PIPELINE_ID
|
||||
value: 29
|
||||
- name: ROCR_RUNTIME_PIPELINE_ID
|
||||
value: 10
|
||||
- name: ROCR_RUNTIME_TAGGED_PIPELINE_ID
|
||||
value: 24
|
||||
- name: ROCRAND_GFX942_TEST_PIPELINE_ID
|
||||
value: 183
|
||||
- name: ROCRAND_PIPELINE_ID
|
||||
value: 95
|
||||
- name: ROCRAND_TAGGED_PIPELINE_ID
|
||||
value: 41
|
||||
- name: ROCSOLVER_GFX942_TEST_PIPELINE_ID
|
||||
value: 199
|
||||
value: 274
|
||||
- name: ROCSOLVER_PIPELINE_ID
|
||||
value: 81
|
||||
- name: ROCSOLVER_TAGGED_PIPELINE_ID
|
||||
value: 55
|
||||
- name: ROCSPARSE_GFX942_TEST_PIPELINE_ID
|
||||
value: 191
|
||||
- name: ROCSPARSE_PIPELINE_ID
|
||||
value: 98
|
||||
- name: ROCSPARSE_TAGGED_PIPELINE_ID
|
||||
value: 67
|
||||
- name: ROCT_THUNK_INTERFACE_PIPELINE_ID
|
||||
value: 3
|
||||
- name: ROCT_THUNK_INTERFACE_TAGGED_PIPELINE_ID
|
||||
value: 9
|
||||
- name: ROCTHRUST_GFX942_TEST_PIPELINE_ID
|
||||
value: 194
|
||||
- name: ROCTHRUST_PIPELINE_ID
|
||||
value: 94
|
||||
- name: ROCTHRUST_TAGGED_PIPELINE_ID
|
||||
value: 26
|
||||
- name: ROCTRACER_GFX942_TEST_PIPELINE_ID
|
||||
value: 181
|
||||
value: 276
|
||||
- name: ROCTRACER_PIPELINE_ID
|
||||
value: 141
|
||||
- name: ROCTRACER_TAGGED_PIPELINE_ID
|
||||
value: 30
|
||||
- name: ROCWMMA_GFX942_TEST_PIPELINE_ID
|
||||
value: 193
|
||||
- name: ROCWMMA_PIPELINE_ID
|
||||
value: 109
|
||||
- name: ROCWMMA_TAGGED_PIPELINE_ID
|
||||
value: 57
|
||||
- name: RPP_GFX942_TEST_PIPELINE_ID
|
||||
value: 182
|
||||
- name: RPP_PIPELINE_ID
|
||||
value: 78
|
||||
- name: RPP_TAGGED_PIPELINE_ID
|
||||
value: 39
|
||||
- name: TRANSFERBENCH_PIPELINE_ID
|
||||
value: 265
|
||||
- name: TRANSFERBENCH_TAGGED_PIPELINE_ID
|
||||
value: 266
|
||||
- name: BOOST_DEPENDENCY_PIPELINE_ID
|
||||
value: 250
|
||||
|
||||
@@ -1,3 +1,44 @@
|
||||
GovReport
|
||||
MLPerf
|
||||
QKV
|
||||
summarization
|
||||
gpt
|
||||
openai
|
||||
oss
|
||||
MXFP
|
||||
SGLang
|
||||
VMware
|
||||
amd
|
||||
bdf
|
||||
compatiblity
|
||||
csv
|
||||
enum
|
||||
json
|
||||
subproject
|
||||
ROCpd
|
||||
rocpd
|
||||
STL
|
||||
XCCs
|
||||
chiplets
|
||||
hipRTC
|
||||
nvRTC
|
||||
warpSize
|
||||
Datacenter
|
||||
GST
|
||||
IET
|
||||
LTO
|
||||
MX
|
||||
Microscaling
|
||||
NANOO
|
||||
ROCprof
|
||||
affinitization
|
||||
amdclang
|
||||
benefitting
|
||||
demangled
|
||||
inlined
|
||||
microscaling
|
||||
roofline
|
||||
torchtitan
|
||||
AAC
|
||||
ABI
|
||||
ACE
|
||||
@@ -34,6 +75,7 @@ Autocast
|
||||
BARs
|
||||
BLAS
|
||||
BMC
|
||||
BabelStream
|
||||
Blit
|
||||
Blockwise
|
||||
Bluefield
|
||||
@@ -138,6 +180,7 @@ GDR
|
||||
GDS
|
||||
GEMM
|
||||
GEMMs
|
||||
GFLOPS
|
||||
GFortran
|
||||
GFXIP
|
||||
Gemma
|
||||
@@ -226,6 +269,7 @@ LM
|
||||
LSAN
|
||||
LSan
|
||||
LTS
|
||||
LanguageCrossEntropy
|
||||
LoRA
|
||||
MEM
|
||||
MERCHANTABILITY
|
||||
@@ -243,6 +287,7 @@ MMIOH
|
||||
MMU
|
||||
MNIST
|
||||
MPI
|
||||
MPT
|
||||
MSVC
|
||||
MVAPICH
|
||||
MVFFR
|
||||
@@ -259,6 +304,7 @@ Meta's
|
||||
Miniconda
|
||||
MirroredStrategy
|
||||
Mixtral
|
||||
MosaicML
|
||||
Multicore
|
||||
Multithreaded
|
||||
MyEnvironment
|
||||
@@ -267,6 +313,7 @@ NBIO
|
||||
NBIOs
|
||||
NCCL
|
||||
NCF
|
||||
NFS
|
||||
NIC
|
||||
NICs
|
||||
NLI
|
||||
@@ -329,6 +376,7 @@ PipelineParallel
|
||||
PnP
|
||||
PowerEdge
|
||||
PowerShell
|
||||
Pretrained
|
||||
Pretraining
|
||||
Profiler's
|
||||
PyPi
|
||||
@@ -494,6 +542,7 @@ ZenDNN
|
||||
accuracies
|
||||
activations
|
||||
addr
|
||||
ade
|
||||
ai
|
||||
alloc
|
||||
allocatable
|
||||
@@ -509,6 +558,7 @@ avx
|
||||
awk
|
||||
backend
|
||||
backends
|
||||
bb
|
||||
benchmarked
|
||||
benchmarking
|
||||
bfloat
|
||||
@@ -532,6 +582,7 @@ cd
|
||||
centos
|
||||
centric
|
||||
changelog
|
||||
checkpointing
|
||||
chiplet
|
||||
cmake
|
||||
cmd
|
||||
@@ -572,6 +623,7 @@ de
|
||||
deallocation
|
||||
debuggability
|
||||
debian
|
||||
deepseek
|
||||
denoise
|
||||
denoised
|
||||
denoises
|
||||
@@ -595,6 +647,7 @@ embeddings
|
||||
enablement
|
||||
encodings
|
||||
endfor
|
||||
endif
|
||||
endpgm
|
||||
enqueue
|
||||
env
|
||||
@@ -637,6 +690,7 @@ hipSPARSELt
|
||||
hipTensor
|
||||
hipamd
|
||||
hipblas
|
||||
hipcc
|
||||
hipcub
|
||||
hipfft
|
||||
hipfort
|
||||
@@ -695,6 +749,7 @@ migratable
|
||||
miopen
|
||||
miopengemm
|
||||
mivisionx
|
||||
mixtral
|
||||
mjx
|
||||
mkdir
|
||||
mlirmiopen
|
||||
@@ -836,6 +891,7 @@ subfolder
|
||||
subfolders
|
||||
submodule
|
||||
submodules
|
||||
subnet
|
||||
supercomputing
|
||||
symlink
|
||||
symlinks
|
||||
|
||||
7307
CHANGELOG.md
7307
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
@@ -1,81 +0,0 @@
|
||||
This XML file does not appear to have any style information associated with it. The document tree is shown below.
|
||||
<manifest>
|
||||
<remote name="gerritgit" fetch="ssh://gerritgit/" review="gerrit-git.amd.com"/>
|
||||
<remote name="lightning-ghemu" fetch="ssh://github-emu/AMD-Lightning-Internal"/>
|
||||
<remote name="rocm" fetch="https://github.com/ROCm"/>
|
||||
<remote name="rocm-ghemu" fetch="ssh://github-emu/AMD-ROCm-Internal"/>
|
||||
<default remote="gerritgit" revision="release/rocm-rel-6.4" sync-j="4" sync-c="true"/>
|
||||
<project name="AMDMIGraphX" remote="rocm" revision="908b94a3f0822a4fee89d99c3cfc51cd9c93f2f6" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="MIOpen" remote="rocm" revision="f10c6ed8085cfabf8877294ab44301d8180999e8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="MIVisionX" remote="rocm" revision="a2b69e5b30f2dbdf66055ec99a2b5559b572f7af" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="OpenCL-CLHPP" remote="rocm-ghemu" revision="6f7e82dee83aea7f277a4b874da309902ea51f6e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="OpenCL-Headers" remote="rocm-ghemu" revision="848d67b6fd471318816a81601d469b086487d18e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCR-Runtime" remote="rocm-ghemu" revision="1d9f08cabd33bd6302add72d0be2bfe0e64eea3a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCdbgapi" remote="rocm-ghemu" revision="59be7ff0aaafe82feb78f30990c8fdf62838cc98" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCgdb" remote="rocm-ghemu" revision="401bb21f2f3c72bbb90ccce12dc3ef481f9a1d8a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCmValidationSuite" remote="rocm" revision="5f1a9665f6241b0346c88cfd21a6073628da3593" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="Tensile" remote="rocm" revision="be49885fce2a61b600ae4593f1c2d00c8b4fa11e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="TransferBench" remote="rocm" revision="3ea2f226ec818158ba97e4ee0ec0b589f13f4641" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="amdsmi" remote="rocm-ghemu" revision="e6a209ef809f1b09a424572afd685ec754a9042b" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="aomp" path="openmp-extras/aomp" remote="lightning-ghemu" revision="24932c59c0759a57ee52d327d9a10a2e466e35a7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" remote="lightning-ghemu" revision="6f8038ada9dec082ea091d30c98c0834669d12a1" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="aqlprofile" remote="rocm-ghemu" revision="7fae75ec6bf7b1a631707ae859542d733f8a1f43" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="build-infra" path="ROCm" remote="rocm-ghemu" revision="811ec9cc6d1588bf66619365b9b4db96ac6acf68" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra"/>
|
||||
<project name="clr" remote="rocm-ghemu" revision="a1adcfdd44f4560c0268e36c8afeb94f760dc963" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="composable_kernel" remote="rocm" revision="a8c5bd9b9ad950c3e742877e01cb784da91664e3" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="compute/ec/hip-examples" path="HIP-Examples" revision="41b0cff8077a25390c2bbda827eb9f6f37ec1ef3" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="compute/ec/hip-examples-private" path="hip-examples-private" revision="dc69edb405804987753735a369478503d82ce9c2" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="compute/ec/jenkins-utils" path="jenkins-utils" revision="bb517b014ff055b62d3860addc23ddd06b0c3e6e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/ml-framework-ci" path="ml-framework-ci" revision="83440e22ebf1e9443b6df737224c1e5e2b91e0c4" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,framework"/>
|
||||
<project name="compute/ec/packaging/meta" path="meta" revision="c7cffa2e4199da1fd68b8b3568282dd59d49a4df" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/prototype" path="build" revision="d71a2766e11e057e5c698caea8fc4ebc0f72cb3e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/rocm_bandwidth_test" path="rocm_bandwidth_test" revision="84b8ddd2686be9bd3e438126b44e6bb10d94d522" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="flang" path="openmp-extras/flang" remote="lightning-ghemu" revision="390169508a03cecf85d43f5cee41e223355f598f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="half" remote="rocm" revision="1ddada225144cac0de8f6b5c0dd9acffd99a2e68" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hip" remote="rocm-ghemu" revision="22b0b2eb9a09e30dca11b213872127f9caa2e1e7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="hip-tests" remote="rocm-ghemu" revision="dc28111737706aad93e38c2f746ccbc13dbf1b80" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="hipBLAS" remote="rocm" revision="0a335435e9c8a833d7106e4ae5057eb58cea2fef" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipBLAS-common" remote="rocm" revision="7c1566ba4628e777b91511242899b6df48555d04" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipBLASLt" remote="rocm" revision="4d62e135cfb4008cf7b508995cad347a1bc750c8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipCUB" remote="rocm" revision="a6005943c5804535990429925318e7900eb6e801" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipFFT" remote="rocm" revision="396169c84a2bb3c7ed7245caefe66002138e7c6c" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipRAND" remote="rocm" revision="d2516cc199690fd91abfdc5908ecfd88e3553067" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSOLVER" remote="rocm" revision="ca0de3c9c95df4345b76cd8a56e72c84b7d5fc79" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSPARSE" remote="rocm" revision="a6c62e48eb8a2326475f7bbb4705c5b926a5edc8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSPARSELt" remote="rocm" revision="f3f4f590a49ae9f9c9ce1451c42db4c2bfd00eed" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipTensor" remote="rocm" revision="e5529b92914be79e4887a92b48b30f88b616c9a5" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipfort" remote="rocm" revision="f3d6aa3e8657d665a43fa2815ca2e49ce39a464a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipify" path="HIPIFY" remote="lightning-ghemu" revision="ed0de49132211c6ddbd40f5cd89b5841e832ac3d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="hipother" remote="rocm-ghemu" revision="49b1588f834dbe1a4db1bddb3647a91b15f618b8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="llvm-project" remote="lightning-ghemu" revision="aa0c041cb49bb50af268504907b7899fec59ae4e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="rccl" remote="rocm" revision="12f8f61f3a5db87bf158c60fdd5e38a32c903b08" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rdc" remote="rocm-ghemu" revision="0224310c872df0fae56ffc883c50c7f47dc82870" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocAL" remote="rocm" revision="373ef865aca43528559e7a9134f09e49a9e9b7c6" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocALUTION" remote="rocm" revision="cb256de3574a4fcbc6a52ed5986b787173cd6dc2" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocBLAS" remote="rocm" revision="80e5394d6a68901ce48b03da47b33b1e69d58be7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocDecode" remote="rocm" revision="a2a7b63cad8f90a94e21232b44460a8fb2d52304" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocFFT" remote="rocm" revision="058ba87fdcfdae334dbc8dbe048955b248e9328a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocJPEG" remote="rocm" revision="73d36d35d90137ffbfcec276bdf973823ef0c0b9" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocPRIM" remote="rocm" revision="d8771ec18ad45c4d697800c22fb21241f22a915f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocPyDecode" remote="rocm" revision="848e49d29d4d6173fb4b57a9223ce68c049baa28" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocRAND" remote="rocm" revision="4d5d3a88d1898705dadf5c06e7b0400d51a13c36" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSHMEM" remote="rocm" revision="7702b3c0f3f41baf6a80aa6b22fa90dec1a6801e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSOLVER" remote="rocm" revision="db754e3f55daab54abb86f17cd6b4066c504e163" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSPARSE" remote="rocm" revision="4953add0aee37ad26700e8bcd6defbfa6b3a4d08" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocThrust" remote="rocm" revision="6bf2777019827e1a2898547ced9a03bf5024ed7d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocWMMA" remote="rocm" revision="1a5b6231663fcf3e00abf790aeae843278f16a65" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocm-cmake" remote="rocm" revision="ecc716b97c2239cff00422ed7a43cd52a0839a0e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocm-core" remote="rocm-ghemu" revision="73dae9c82ace4fb8e1e4028f86ff0365f21c9f51" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="rocm-examples" remote="rocm" revision="3bbd2987a3b46cfd2c8348c2317042f3ad604e38" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocm_smi_lib" remote="rocm-ghemu" revision="1f242d314916336d6ce5c731f486edfaa8f0b987" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocminfo" remote="rocm-ghemu" revision="6ea2ba38c8e1ab2899acf66878148b1192fd0bee" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler" remote="rocm-ghemu" revision="40da7312a06f8052f5c148a4709cab64686f881d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-compute" remote="rocm" revision="7b25d958b4e030ea64a24ed0a62dcac1e48193ab" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-register-internal" path="rocprofiler-register" remote="rocm-ghemu" revision="7c6cd44f637d400b50b803b0b351be302ad6827d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-sdk-internal" path="rocprofiler-sdk" remote="rocm-ghemu" revision="e8e49fe76971000a42a5a177d9a727d16dd0ebcf" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-systems" remote="rocm" revision="2e945e4a08781e13a822f568814e2c434fd8858f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocr_debug_agent" remote="rocm-ghemu" revision="9eec1a52a36b5203bbac54a1b442fe9a45b6a43e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="roctracer" remote="rocm-ghemu" revision="f55a6943816641c081aa167c8a45904ddae2ba5e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rpp" remote="rocm" revision="5fb204ca7018b87889e061b720c5b06f6b9bce9b" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="spirv-llvm-translator" path="llvm-project/llvm/projects/SPIRV-LLVM-Translator" remote="lightning-ghemu" revision="ae12ddbec86765df369b18ac764e170082079819" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
</manifest>
|
||||
143
README.md
143
README.md
@@ -19,142 +19,17 @@ ROCm supports programming models, such as OpenMP and OpenCL, and includes all ne
|
||||
source software compilers, debuggers, and libraries. ROCm is fully integrated into machine learning
|
||||
(ML) frameworks, such as PyTorch and TensorFlow.
|
||||
|
||||
## Getting the ROCm Source Code
|
||||
> [!IMPORTANT]
|
||||
> A new open source build platform for ROCm is under development at
|
||||
> https://github.com/ROCm/TheRock, featuring a unified CMake build with bundled
|
||||
> dependencies, Windows support, and more.
|
||||
>
|
||||
> The instructions below describe the prior process for building from source
|
||||
> which will be replaced once TheRock is mature enough.
|
||||
|
||||
AMD ROCm is built from open source software. It is, therefore, possible to modify the various components of ROCm by downloading the source code and rebuilding the components. The source code for ROCm components can be cloned from each of the GitHub repositories using git. For easy access to download the correct versions of each of these tools, the ROCm repository contains a repo manifest file called [default.xml](./default.xml). You can use this manifest file to download the source code for ROCm software.
|
||||
## Getting and Building ROCm from Source
|
||||
|
||||
### Installing the repo tool
|
||||
|
||||
The repo tool from Google allows you to manage multiple git repositories simultaneously. Run the following commands to install the repo tool:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/bin/
|
||||
curl https://storage.googleapis.com/git-repo-downloads/repo > ~/bin/repo
|
||||
chmod a+x ~/bin/repo
|
||||
```
|
||||
|
||||
**Note:** The ```~/bin/``` folder is used as an example. You can specify a different folder to install the repo tool into if you desire.
|
||||
|
||||
### Installing git-lfs
|
||||
|
||||
Some ROCm projects use the Git Large File Storage (LFS) format that may require you to install git-lfs. Refer to [Git Large File Storage](https://github.com/git-lfs/git-lfs/blob/main/INSTALLING.md) for more information. For example, to install git-lfs for Ubuntu, use the following command:
|
||||
|
||||
```bash
|
||||
sudo apt-get install git-lfs
|
||||
```
|
||||
|
||||
### Downloading the ROCm source code
|
||||
|
||||
The following example shows how to use the repo tool to download the ROCm source code. If you choose a directory other than ~/bin/ to install the repo tool, you must use that chosen directory in the code as shown below:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/ROCm/
|
||||
cd ~/ROCm/
|
||||
export ROCM_VERSION=6.4.0
|
||||
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
|
||||
~/bin/repo sync
|
||||
```
|
||||
|
||||
**Note:** Using this sample code will cause the repo tool to download the open source code associated with the specified ROCm release. Ensure that you have ssh-keys configured on your machine for your GitHub ID prior to the download as explained at [Connecting to GitHub with SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh).
|
||||
|
||||
## Building the ROCm source code
|
||||
|
||||
Each ROCm component repository contains directions for building that component, such as the rocSPARSE documentation [Installation and Building for Linux](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html). Refer to the specific component documentation for instructions on building the repository.
|
||||
|
||||
Each release of the ROCm software supports specific hardware and software configurations. Refer to [System requirements (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) for the current supported hardware and OS.
|
||||
|
||||
## Build ROCm from source
|
||||
|
||||
The Build will use as many processors as it can find to build in parallel. Some of the compiles can consume as much as 10GB of RAM, so make sure you have plenty of Swap Space !
|
||||
|
||||
By default the ROCm build will compile for all supported GPU architectures and will take approximately 500 CPU hours.
|
||||
The Build time will reduce significantly if we limit the GPU Architecture/s against which we need to build by using the environment variable GPU_ARCHS as mentioned below.
|
||||
|
||||
```bash
|
||||
# --------------------------------------
|
||||
# Step1: clone source code
|
||||
# --------------------------------------
|
||||
|
||||
mkdir -p ~/WORKSPACE/ # Or any folder name other than WORKSPACE
|
||||
cd ~/WORKSPACE/
|
||||
export ROCM_VERSION=6.4.0
|
||||
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
|
||||
~/bin/repo sync
|
||||
|
||||
# --------------------------------------
|
||||
# Step 2: Prepare build environment
|
||||
# --------------------------------------
|
||||
|
||||
# Option 1: Start a docker container
|
||||
# Pulling required base docker images:
|
||||
# Ubuntu22.04 built from ROCm/tools/rocm-build/docker/ubuntu22/Dockerfile
|
||||
docker pull rocm/rocm-build-ubuntu-22.04:6.4
|
||||
# Ubuntu24.04 built from ROCm/tools/rocm-build/docker/ubuntu24/Dockerfile
|
||||
docker pull rocm/rocm-build-ubuntu-24.04:6.4
|
||||
|
||||
# Start docker container and mount the source code folder:
|
||||
docker run -ti \
|
||||
-e ROCM_VERSION=${ROCM_VERSION} \
|
||||
-e CCACHE_DIR=$HOME/.ccache \
|
||||
-e CCACHE_ENABLED=true \
|
||||
-e DOCK_WORK_FOLD=/src \
|
||||
-w /src \
|
||||
-v $PWD:/src \
|
||||
-v /etc/passwd:/etc/passwd \
|
||||
-v /etc/shadow:/etc/shadow \
|
||||
-v ${HOME}/.ccache:${HOME}/.ccache \
|
||||
-u $(id -u):$(id -g) \
|
||||
<replace_with_required_ubuntu_base_docker_image> bash
|
||||
|
||||
# Option 2: Install required packages into the host machine
|
||||
# For ubuntu22.04 system
|
||||
cd ROCm/tools/rocm-build/docker/ubuntu22
|
||||
cp * /tmp && cd /tmp
|
||||
bash install-prerequisites.sh
|
||||
# For ubuntu24.04 system
|
||||
cd ROCm/tools/rocm-build/docker/ubuntu24
|
||||
cp * /tmp && cd /tmp
|
||||
bash install-prerequisites.sh
|
||||
|
||||
# --------------------------------------
|
||||
# Step 3: Run build command line
|
||||
# --------------------------------------
|
||||
|
||||
# Select GPU targets before building:
|
||||
# When GPU_ARCHS is not set, default GPU targets supported by ROCm6.1 will be used.
|
||||
# To build against a subset of GFX architectures you can use the below env variable.
|
||||
# Support MI300 (gfx940, gfx941, gfx942).
|
||||
export GPU_ARCHS="gfx942" # Example
|
||||
export GPU_ARCHS="gfx940;gfx941;gfx942" # Example
|
||||
|
||||
# Pick and run build commands in the docker container:
|
||||
# Build rocm-dev packages
|
||||
make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} rocm-dev
|
||||
# Build all ROCm packages
|
||||
make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} all
|
||||
# list all ROCm components to find required components
|
||||
make -f ROCm/tools/rocm-build/ROCm.mk list_components
|
||||
# Build a single ROCm packages
|
||||
make -f ROCm/tools/rocm-build/ROCm.mk T_rocblas
|
||||
|
||||
# Find built packages in ubuntu22.04:
|
||||
out/ubuntu-22.04/22.04/deb/
|
||||
# Find built packages in ubuntu24.04:
|
||||
out/ubuntu-24.04/24.04/deb/
|
||||
|
||||
# Find built logs in ubuntu22.04:
|
||||
out/ubuntu-22.04/22.04/logs/
|
||||
# Find built logs in ubuntu24.04:
|
||||
out/ubuntu-24.04/24.04/logs/
|
||||
# All logs pertaining to failed components, end with .errrors extension.
|
||||
out/ubuntu-22.04/22.04/logs/rocblas.errors # Example
|
||||
# All logs pertaining to building components, end with .inprogress extension.
|
||||
out/ubuntu-22.04/22.04/logs/rocblas.inprogress # Example
|
||||
# All logs pertaining to passed components, use the component names.
|
||||
out/ubuntu-22.04/22.04/logs/rocblas # Example
|
||||
```
|
||||
|
||||
Note: [Overview for ROCm.mk](tools/rocm-build/README.md)
|
||||
Please use [TheRock](https://github.com/ROCm/TheRock) build system to build ROCm from source.
|
||||
|
||||
## ROCm documentation
|
||||
|
||||
|
||||
599
RELEASE.md
599
RELEASE.md
@@ -1,599 +0,0 @@
|
||||
<!-- Do not edit this file! -->
|
||||
<!-- This file is autogenerated with -->
|
||||
<!-- tools/autotag/tag_script.py -->
|
||||
<!-- Disable lints since this is an auto-generated file. -->
|
||||
<!-- markdownlint-disable blanks-around-headers -->
|
||||
<!-- markdownlint-disable no-duplicate-header -->
|
||||
<!-- markdownlint-disable no-blanks-blockquote -->
|
||||
<!-- markdownlint-disable ul-indent -->
|
||||
<!-- markdownlint-disable no-trailing-spaces -->
|
||||
<!-- markdownlint-disable reference-links-images -->
|
||||
<!-- markdownlint-disable no-missing-space-atx -->
|
||||
<!-- spellcheck-disable -->
|
||||
# ROCm 6.4.1 release notes
|
||||
|
||||
The release notes provide a summary of notable changes since the previous ROCm release.
|
||||
|
||||
- [Release highlights](#release-highlights)
|
||||
|
||||
- [Operating system and hardware support changes](#operating-system-and-hardware-support-changes)
|
||||
|
||||
- [ROCm components versioning](#rocm-components)
|
||||
|
||||
- [Detailed component changes](#detailed-component-changes)
|
||||
|
||||
- [ROCm known issues](#rocm-known-issues)
|
||||
|
||||
- [ROCm resolved issues](#rocm-resolved-issues)
|
||||
|
||||
- [ROCm upcoming changes](#rocm-upcoming-changes)
|
||||
|
||||
```{note}
|
||||
If you’re using Radeon™ PRO or Radeon GPUs in a workstation setting with a display connected, see the [Use ROCm on Radeon GPUs](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility/native_linux/native_linux_compatibility.html)
|
||||
documentation to verify compatibility and system requirements.
|
||||
```
|
||||
|
||||
## Release highlights
|
||||
|
||||
The following are notable new features and improvements in ROCm 6.4.1. For changes to individual components, see
|
||||
[Detailed component changes](#detailed-component-changes).
|
||||
|
||||
### Addition of DPX partition mode under NPS2 memory mode
|
||||
|
||||
AMD Instinct MI300X now supports DPX partition mode under NPS2 memory mode. For more partitioning information, see the [Deep dive into the MI300 compute and memory partition modes](https://rocm.blogs.amd.com/software-tools-optimization/compute-memory-modes/README.html) blog and [AMD Instinct MI300X system optimization](https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#change-gpu-partition-modes).
|
||||
|
||||
### Introducing the ROCm Data Science toolkit
|
||||
The ROCm Data Science toolkit (or ROCm-DS) is an open-source software collection for high-performance data science applications built on the core ROCm platform. You can leverage ROCm-DS to accelerate both new and existing data science workloads, allowing you to execute intensive applications with larger datasets at lightning speed. ROCm-DS is in an early access state. Running production workloads is not recommended. For more information, see [AMD ROCm-DS Documentation](https://rocm.docs.amd.com/projects/rocm-ds/en/latest/index.html).
|
||||
|
||||
### ROCm Offline Installer Creator updates
|
||||
|
||||
The ROCm Offline Installer Creator 6.4.1 now allows you to use the SPACEBAR or ENTER keys for menu item selection in the GUI. It also adds support for Debian 12 and fixes an issue for “full” mode RHEL offline installer creation, where GDM packages were uninstalled during offline installation. See [ROCm Offline Installer Creator](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/rocm-offline-installer.html) for more information.
|
||||
|
||||
### ROCm Runfile Installer updates
|
||||
|
||||
The ROCm Runfile Installer 6.4.1 adds the following improvements:
|
||||
- Relaxed version checks for installation on different distributions. Provided the dependencies are not installed by the Runfile Installer, you can target installation for a different path from the host system running the installer. For example, the installer can run on a system using Ubuntu 22.04 and install to a partition/system that is using Ubuntu 24.04.
|
||||
- Performance improvements for detecting a previous ROCm install.
|
||||
- Removal of the extra `opt` directory created for the target during the ROCm installation. For example, installing to `target=/home/amd` now installs ROCm to `/home/amd/rocm-6.4.1` and not `/home/amd/opt/rocm-6.4.1`. For installs using `target=/`, the installation will continue to use `/opt/`.
|
||||
- The Runfile Installer can be used to uninstall any Runfile-based installation of the driver.
|
||||
- In the CLI interface, The `postrocm` argument can now be run separately from the `rocm` argument. In cases where `postrocm` was missed from the initial ROCm install, `postrocm` can now be run on the same target folder. For example, if you install ROCm 6.4.1 using: `install.run target=/myrocm rocm` you can run the post-installation separately using the command `install.run target=/myrocm/rocm-6.4.1 postrocm`.
|
||||
|
||||
For more information, see [ROCm Runfile Installer](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/rocm-runfile-installer.html).
|
||||
|
||||
### ROCm documentation updates
|
||||
|
||||
ROCm documentation continues to be updated to provide clearer and more comprehensive guidance for a wider variety of user needs and use cases.
|
||||
|
||||
* [Tutorials for AI developers](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/) have been expanded with five
|
||||
new tutorials. These tutorials are Jupyter notebook-based, easy-to-follow documents. They are ideal for AI developers who want to learn about specific topics, including inference, fine-tuning, and training.
|
||||
* The [Training a model with LLM Foundry](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry.html) performance testing guide has been added. This guide describes how to use the preconfigured [ROCm/pytorch-training](https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5) training environment and [https://github.com/ROCm/MAD](https://github.com/ROCm/MAD) to test the training performance of the LLM Foundry framework on AMD Instinct MI325X and MI300X accelerators using the [MPT-30B](https://huggingface.co/mosaicml/mpt-30b) model.
|
||||
* The [Training a model with PyTorch](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html) performance testing guide has been updated to feature the latest [ROCm/pytorch-training](https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5) Docker image (a preconfigured training environment with ROCm and PyTorch). Support for [Llama 3.3 70B](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct) has been added.
|
||||
* The [Training a model with JAX MaxText](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.html) performance testing guide has been updated to feature the latest [ROCm/jax-training](https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.5/images/sha256-4e0516358a227cae8f552fb866ec07e2edcf244756f02e7b40212abfbab5217b) Docker image (a preconfigured training environment with ROCm, JAX, and [MaxText](https://github.com/AI-Hypercomputer/maxtext)). Support for [Llama 3.3 70B](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct) has been added.
|
||||
* The [vLLM inference performance testing](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference/vllm-benchmark.html?model=pyt_vllm_qwq-32b) guide has been updated to feature the latest [ROCm/vLLM](https://hub.docker.com/layers/rocm/vllm/instinct_main/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845) Docker image (a preconfigured environment for inference with ROCm and [vLLM](https://docs.vllm.ai/en/latest/)). Support for the [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) model has been added.
|
||||
* The [PyTorch inference performance testing](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference/pytorch-inference-benchmark.html?model=pyt_clip_inference) guide has been added, featuring the [ROCm/PyTorch](https://hub.docker.com/layers/rocm/pytorch/latest/images/sha256-ab1d350b818b90123cfda31363019d11c0d41a8f12a19e3cb2cb40cf0261137d) Docker image (a preconfigured inference environment with ROCm and PyTorch) with initial support for the [CLIP](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) and [Chai-1](https://huggingface.co/chaidiscovery/chai-1) models.
|
||||
* The [Data types and precision support](https://rocm.docs.amd.com/en/latest/reference/precision-support.html) topic has been updated with new information in the library's precision support list.
|
||||
* The deep learning frameworks compatibility pages have been updated with new information and are reorganized, making them easier to review. For more information, see [PyTorch compatibility](https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/pytorch-compatibility.html), [TensorFlow compatibility](https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/tensorflow-compatibility.html), and [JAX compatibility](https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/jax-compatibility.html).
|
||||
|
||||
## Operating system and hardware support changes
|
||||
|
||||
Operating system and hardware support remain unchanged in this release.
|
||||
|
||||
See the [Compatibility
|
||||
matrix](../../docs/compatibility/compatibility-matrix.rst)
|
||||
for more information about operating system and hardware compatibility.
|
||||
|
||||
## ROCm components
|
||||
|
||||
The following table lists the versions of ROCm components for ROCm 6.4.1, including any version
|
||||
changes from 6.4.0 to 6.4.1. Click the component's updated version to go to a list of its changes.
|
||||
Click {fab}`github` to go to the component's source code on GitHub.
|
||||
|
||||
<div class="pst-scrollable-table-container">
|
||||
<table id="rocm-rn-components" class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Category</th>
|
||||
<th>Group</th>
|
||||
<th>Name</th>
|
||||
<th>Version</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<colgroup>
|
||||
<col span="1">
|
||||
<col span="1">
|
||||
</colgroup>
|
||||
<tbody class="rocm-components-libs rocm-components-ml">
|
||||
<tr>
|
||||
<th rowspan="9">Libraries</th>
|
||||
<th rowspan="9">Machine learning and computer vision</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/composable_kernel/en/docs-6.4.0/index.html">Composable Kernel</a></td>
|
||||
<td>1.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/composable_kernel"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/AMDMIGraphX/en/docs-6.4.0/index.html">MIGraphX</a></td>
|
||||
<td>2.12.0</td>
|
||||
<td><a href="https://github.com/ROCm/AMDMIGraphX"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/MIOpen/en/docs-6.4.0/index.html">MIOpen</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/MIOpen"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/MIVisionX/en/docs-6.4.0/index.html">MIVisionX</a></td>
|
||||
<td>3.2.0</td>
|
||||
<td><a href="https://github.com/ROCm/MIVisionX"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocAL/en/docs-6.4.0/index.html">rocAL</a></td>
|
||||
<td>2.2.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocAL"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocDecode/en/docs-6.4.0/index.html">rocDecode</a></td>
|
||||
<td>0.10.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocDecode"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocJPEG/en/docs-6.4.0/index.html">rocJPEG</a></td>
|
||||
<td>0.8.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocJPEG"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocPyDecode/en/docs-6.4.0/index.html">rocPyDecode</a></td>
|
||||
<td>0.3.1</td>
|
||||
<td><a href="https://github.com/ROCm/rocPyDecode"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rpp/en/docs-6.4.0/index.html">RPP</a></td>
|
||||
<td>1.9.10</td>
|
||||
<td><a href="https://github.com/ROCm/rpp"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-libs rocm-components-communication tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="2"></th>
|
||||
<th rowspan="2">Communication</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rccl/en/docs-6.4.0/index.html">RCCL</a></td>
|
||||
<td>2.22.3 ⇒ <a href="#rccl-2-22-3">2.22.3</td>
|
||||
<td><a href="https://github.com/ROCm/rccl"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://github.com/ROCm/rocSHMEM">rocSHMEM</a></td>
|
||||
<td>2.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocSHMEM"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-libs rocm-components-math tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="16"></th>
|
||||
<th rowspan="16">Math</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.4.0/index.html">hipBLAS</a></td>
|
||||
<td>2.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipBLAS"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipBLASLt/en/docs-6.4.0/index.html">hipBLASLt</a></td>
|
||||
<td>0.12.0 ⇒ <a href="#hipblaslt-0-12-1">0.12.1</td>
|
||||
<td><a href="https://github.com/ROCm/hipBLASLt"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipFFT/en/docs-6.4.0/index.html">hipFFT</a></td>
|
||||
<td>1.0.18</td>
|
||||
<td><a href="https://github.com/ROCm/hipFFT"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipfort/en/docs-6.4.0/index.html">hipfort</a></td>
|
||||
<td>0.6.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipfort"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipRAND/en/docs-6.4.0/index.html">hipRAND</a></td>
|
||||
<td>2.12.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipRAND"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipSOLVER/en/docs-6.4.0/index.html">hipSOLVER</a></td>
|
||||
<td>2.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipSPARSE/en/docs-6.4.0/index.html">hipSPARSE</a></td>
|
||||
<td>3.2.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipSPARSE"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipSPARSELt/en/docs-6.4.0/index.html">hipSPARSELt</a></td>
|
||||
<td>0.2.3</td>
|
||||
<td><a href="https://github.com/ROCm/hipSPARSELt"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocALUTION/en/docs-6.4.0/index.html">rocALUTION</a></td>
|
||||
<td>3.2.2 ⇒ <a href="#rocalution-3-2-3">3.2.3</td></td>
|
||||
<td><a href="https://github.com/ROCm/rocALUTION"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocBLAS/en/docs-6.4.0/index.html">rocBLAS</a></td>
|
||||
<td>4.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocBLAS"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocFFT/en/docs-6.4.0/index.html">rocFFT</a></td>
|
||||
<td>1.0.32</td>
|
||||
<td><a href="https://github.com/ROCm/rocFFT"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocRAND/en/docs-6.4.0/index.html">rocRAND</a></td>
|
||||
<td>3.3.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocRAND"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocSOLVER/en/docs-6.4.0/index.html">rocSOLVER</a></td>
|
||||
<td>3.28.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocSPARSE/en/docs-6.4.0/index.html">rocSPARSE</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocSPARSE"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocWMMA/en/docs-6.4.0/index.html">rocWMMA</a></td>
|
||||
<td>1.7.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocWMMA"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/Tensile/en/docs-6.4.0/src/index.html">Tensile</a></td>
|
||||
<td>4.43.0</td>
|
||||
<td><a href="https://github.com/ROCm/Tensile"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-libs rocm-components-primitives tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="4"></th>
|
||||
<th rowspan="4">Primitives</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipCUB/en/docs-6.4.0/index.html">hipCUB</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipCUB"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/hipTensor/en/docs-6.4.0/index.html">hipTensor</a></td>
|
||||
<td>1.5.0</td>
|
||||
<td><a href="https://github.com/ROCm/hipTensor"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocPRIM/en/docs-6.4.0/index.html">rocPRIM</a></td>
|
||||
<td>3.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocPRIM"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocThrust/en/docs-6.4.0/index.html">rocThrust</a></td>
|
||||
<td>3.3.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocThrust"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-tools rocm-components-system tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="7">Tools</th>
|
||||
<th rowspan="7">System management</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/amdsmi/en/docs-6.4.0/index.html">AMD SMI</a></td>
|
||||
<td>25.3.0 ⇒ <a href="#amd-smi-25-4-2">25.4.2</a></td>
|
||||
<td><a href="https://github.com/ROCm/amdsmi"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rdc/en/docs-6.4.0/index.html">ROCm Data Center Tool</a></td>
|
||||
<td>0.3.0 ⇒ <a href="#rocm-data-center-tool-0-3-0">0.3.0</td>
|
||||
<td><a href="https://github.com/ROCm/rdc"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocminfo/en/docs-6.4.0/index.html">rocminfo</a></td>
|
||||
<td>1.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocminfo"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocm_smi_lib/en/docs-6.4.0/index.html">ROCm SMI</a></td>
|
||||
<td>7.5.0 ⇒ <a href="#rocm-smi-7-5-0">7.5.0</a></td>
|
||||
<td><a href="https://github.com/ROCm/rocm_smi_lib"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/docs-6.4.0/index.html">ROCmValidationSuite</a></td>
|
||||
<td>1.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCmValidationSuite"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-tools rocm-components-perf">
|
||||
<tr>
|
||||
<th rowspan="6"></th>
|
||||
<th rowspan="6">Performance</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocm_bandwidth_test/en/docs-6.4.0/index.html">ROCm Bandwidth
|
||||
Test</a></td>
|
||||
<td>1.4.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocm_bandwidth_test/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler-compute/en/docs-6.4.0/index.html">ROCm Compute Profiler</a></td>
|
||||
<td>3.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocprofiler-compute"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler-systems/en/docs-6.4.0/index.html">ROCm Systems Profiler</a></td>
|
||||
<td>1.0.0 ⇒ <a href="#rocm-systems-profiler-1-0-1">1.0.1</td>
|
||||
<td><a href="https://github.com/ROCm/rocprofiler-systems"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.0/index.html">ROCProfiler</a></td>
|
||||
<td>2.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCProfiler/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/docs-6.4.0/index.html">ROCprofiler-SDK</a></td>
|
||||
<td>0.6.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocprofiler-sdk/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr >
|
||||
<td><a href="https://rocm.docs.amd.com/projects/roctracer/en/docs-6.4.0/index.html">ROCTracer</a></td>
|
||||
<td>4.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCTracer/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-tools rocm-components-dev">
|
||||
<tr>
|
||||
<th rowspan="5"></th>
|
||||
<th rowspan="5">Development</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIPIFY/en/docs-6.4.0/index.html">HIPIFY</a></td>
|
||||
<td>19.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/HIPIFY/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCdbgapi/en/docs-6.4.0/index.html">ROCdbgapi</a></td>
|
||||
<td>0.77.2</td>
|
||||
<td><a href="https://github.com/ROCm/ROCdbgapi/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCmCMakeBuildTools/en/docs-6.4.0/index.html">ROCm CMake</a></td>
|
||||
<td>0.14.0</td>
|
||||
<td><a href="https://github.com/ROCm/rocm-cmake/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCgdb/en/docs-6.4.0/index.html">ROCm Debugger (ROCgdb)</a>
|
||||
</td>
|
||||
<td>15.2</td>
|
||||
<td><a href="https://github.com/ROCm/ROCgdb/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocr_debug_agent/en/docs-6.4.0/index.html">ROCr Debug Agent</a>
|
||||
</td>
|
||||
<td>2.0.4</td>
|
||||
<td><a href="https://github.com/ROCm/rocr_debug_agent/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-compilers tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="2" colspan="2">Compilers</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIPCC/en/docs-6.4.0/index.html">HIPCC</a></td>
|
||||
<td>1.1.1</td>
|
||||
<td><a href="https://github.com/ROCm/llvm-project/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.0/index.html">llvm-project</a></td>
|
||||
<td>19.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/llvm-project/"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody class="rocm-components-runtimes tbody-reverse-zebra">
|
||||
<tr>
|
||||
<th rowspan="2" colspan="2">Runtimes</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.0/index.html">HIP</a></td>
|
||||
<td>6.4.0 ⇒ <a href="#hip-6-4-1">6.4.1</td>
|
||||
<td><a href="https://github.com/ROCm/HIP/"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCR-Runtime/en/docs-6.4.0/index.html">ROCr Runtime</a></td>
|
||||
<td>1.15.0 ⇒ <a href="#rocr-runtime-1-15-0">1.15.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCR-Runtime/"><i class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
## Detailed component changes
|
||||
|
||||
The following sections describe key changes to ROCm components.
|
||||
|
||||
```{note}
|
||||
For a historical overview of ROCm component updates, see the {doc}`ROCm consolidated changelog </release/changelog>`.
|
||||
```
|
||||
|
||||
### **AMD SMI** (25.4.2)
|
||||
|
||||
#### Added
|
||||
|
||||
* Dumping CPER entries from RAS tool `amdsmi_get_gpu_cper_entries()` to Python and C APIs.
|
||||
- Dumping CPER entries consist of `amdsmi_cper_hdr_t`.
|
||||
- Dumping CPER entries is also enabled in the CLI interface through `sudo amd-smi ras --cper`.
|
||||
|
||||
#### Resolved
|
||||
|
||||
* Fixed partition enumeration in `amd-smi list -e`, `amdsmi_get_gpu_enumeration_info()`, `amdsmi_enumeration_info_t`, `drm_card`, and `drm_render` fields.
|
||||
|
||||
```{note}
|
||||
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **HIP** (6.4.1)
|
||||
|
||||
#### Added
|
||||
|
||||
* New debug mask, to print precise code object information for logging.
|
||||
|
||||
#### Changed
|
||||
|
||||
* Calling the code object has changed. HIP runtime now uses device bitcode before SPIR-V.
|
||||
|
||||
#### Optimized
|
||||
|
||||
* Improved kernel logging using the demangling shader names.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Stale state during the graph capture. The return error was fixed, and HIP runtime now always uses the latest dependent nodes during `hipEventRecord` capture.
|
||||
* Issue of `hipEventRecords` failing to call the `hip::getStream` runtime function.
|
||||
|
||||
### **hipBLASLt** (0.12.1)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an accuracy issue for some solutions using an `FP32` or `TF32` data type with a TT transpose.
|
||||
|
||||
### **RCCL** (2.22.3)
|
||||
|
||||
#### Changed
|
||||
|
||||
* MSCCL++ is now disabled by default. To enable it, set `RCCL_MSCCLPP_ENABLE=1`.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue where early termination, in rare circumstances, could cause the application to stop responding by adding synchronization before destroying a proxy thread.
|
||||
* Fixed the accuracy issue for the MSCCLPP `allreduce7` kernel in graph mode.
|
||||
|
||||
### **rocALUTION** (3.2.3)
|
||||
|
||||
#### Added
|
||||
|
||||
* The `-a` option has been added to the `rmake.py` build script. This option allows you to select specific architectures when building on Microsoft Windows.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue where the `HIP_PATH` environment variable was being ignored when compiling on Microsoft Windows.
|
||||
|
||||
### **ROCm Data Center Tool** (0.3.0)
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for GPU partitions.
|
||||
- `RDC_FI_GPU_BUSY_PERCENT` metric.
|
||||
|
||||
#### Changed
|
||||
|
||||
- Updated `rdc_field` to align with `rdc_bootstrap` for current metrics.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
- Fixed [ROCProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.0/index.html) eval metrics and memory leaks.
|
||||
|
||||
### **ROCm SMI** (7.5.0)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
- Fixed partition enumeration. It now refers to the correct DRM Render and Card paths.
|
||||
|
||||
```{note}
|
||||
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **ROCm Systems Profiler** (1.0.1)
|
||||
|
||||
#### Added
|
||||
|
||||
* How-to document for [network performance profiling](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/amd-staging/how-to/nic-profiling.html) for standard Network Interface Cards (NICs).
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed a build issue with Dyninst on GCC 13.
|
||||
|
||||
### **ROCr Runtime** (1.15.0)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed a rare occurrence issue on AMD Instinct MI25, MI50, and MI100 GPUs, where the `SDMA` copies might start before the dependent Kernel finishes and could cause memory corruption.
|
||||
|
||||
## ROCm known issues
|
||||
|
||||
ROCm known issues are noted on {fab}`github` [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). For known
|
||||
issues related to individual components, review the [Detailed component changes](#detailed-component-changes).
|
||||
|
||||
## ROCm upcoming changes
|
||||
|
||||
The following changes to the ROCm software stack are anticipated for future releases.
|
||||
|
||||
### ROCm SMI deprecation
|
||||
|
||||
[ROCm SMI](https://github.com/ROCm/rocm_smi_lib) will be phased out in an
|
||||
upcoming ROCm release and will enter maintenance mode. After this transition,
|
||||
only critical bug fixes will be addressed and no further feature development
|
||||
will take place.
|
||||
|
||||
It's strongly recommended to transition your projects to [AMD
|
||||
SMI](https://github.com/ROCm/amdsmi), the successor to ROCm SMI. AMD SMI
|
||||
includes all the features of the ROCm SMI and will continue to receive regular
|
||||
updates, new functionality, and ongoing support. For more information on AMD
|
||||
SMI, see the [AMD SMI documentation](https://rocm.docs.amd.com/projects/amdsmi/en/latest/).
|
||||
|
||||
### ROCTracer, ROCProfiler, rocprof, and rocprofv2 deprecation
|
||||
|
||||
Development and support for ROCTracer, ROCProfiler, `rocprof`, and `rocprofv2` are being phased out in favor of ROCprofiler-SDK in upcoming ROCm releases. Starting with ROCm 6.4, only critical defect fixes will be addressed for older versions of the profiling tools and libraries. All users are encouraged to upgrade to the latest version of the ROCprofiler-SDK library and the (`rocprofv3`) tool to ensure continued support and access to new features. ROCprofiler-SDK is still in beta today and will be production-ready in a future ROCm release.
|
||||
|
||||
It's anticipated that ROCTracer, ROCProfiler, `rocprof`, and `rocprofv2` will reach end-of-life by future releases, aligning with Q1 of 2026.
|
||||
|
||||
### AMDGPU wavefront size compiler macro deprecation
|
||||
|
||||
Access to the wavefront size as a compile-time constant via the `__AMDGCN_WAVEFRONT_SIZE`
|
||||
and `__AMDGCN_WAVEFRONT_SIZE__` macros or the `constexpr warpSize` variable is deprecated
|
||||
and will be disabled in a future release.
|
||||
|
||||
* The `__AMDGCN_WAVEFRONT_SIZE__` macro and `__AMDGCN_WAVEFRONT_SIZE` alias will be removed in an upcoming release.
|
||||
It is recommended to remove any use of this macro. For more information, see
|
||||
[AMDGPU support](https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.0/LLVM/clang/html/AMDGPUSupport.html).
|
||||
* `warpSize` will only be available as a non-`constexpr` variable. Where required,
|
||||
the wavefront size should be queried via the `warpSize` variable in device code,
|
||||
or via `hipGetDeviceProperties` in host code. Neither of these will result in a compile-time constant.
|
||||
* For cases where compile-time evaluation of the wavefront size cannot be avoided,
|
||||
uses of `__AMDGCN_WAVEFRONT_SIZE`, `__AMDGCN_WAVEFRONT_SIZE__`, or `warpSize`
|
||||
can be replaced with a user-defined macro or `constexpr` variable with the wavefront
|
||||
size(s) for the target hardware. For example:
|
||||
|
||||
```
|
||||
#if defined(__GFX9__)
|
||||
#define MY_MACRO_FOR_WAVEFRONT_SIZE 64
|
||||
#else
|
||||
#define MY_MACRO_FOR_WAVEFRONT_SIZE 32
|
||||
#endif
|
||||
```
|
||||
|
||||
### HIPCC Perl scripts deprecation
|
||||
|
||||
The HIPCC Perl scripts (`hipcc.pl` and `hipconfig.pl`) will be removed in an upcoming release.
|
||||
|
||||
### Changes to ROCm Object Tooling
|
||||
|
||||
ROCm Object Tooling tools ``roc-obj-ls``, ``roc-obj-extract``, and ``roc-obj`` are
|
||||
deprecated in ROCm 6.4, and will be removed in a future release. Functionality
|
||||
has been added to the ``llvm-objdump --offloading`` tool option to extract all
|
||||
clang-offload-bundles into individual code objects found within the objects
|
||||
or executables passed as input. The ``llvm-objdump --offloading`` tool option also
|
||||
supports the ``--arch-name`` option, and only extracts code objects found with
|
||||
the specified target architecture. See [llvm-objdump](https://llvm.org/docs/CommandGuide/llvm-objdump.html)
|
||||
for more information.
|
||||
|
||||
### HIP runtime API changes
|
||||
|
||||
There are a number of upcoming changes planned for HIP runtime API in an upcoming major release
|
||||
that are not backward compatible with prior releases. Most of these changes increase
|
||||
alignment between HIP and CUDA APIs or behavior. Some of the upcoming changes are to
|
||||
clean up header files, remove namespace collision, and have a clear separation between
|
||||
`hipRTC` and HIP runtime. For more information refer to [HIP Upcoming changes](https://rocm.docs.amd.com/en/latest/about/release-notes.html#id15).
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.4.0"
|
||||
<default revision="refs/tags/rocm-6.4.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="ROCm licensing terms">
|
||||
<meta name="keywords" content="license, licensing terms">
|
||||
</head>
|
||||
|
||||
# ROCm license
|
||||
|
||||
```{include} ../../LICENSE
|
||||
```
|
||||
|
||||
:::{note}
|
||||
The preceding license applies to the [ROCm repository](https://github.com/ROCm/ROCm), which
|
||||
primarily contains documentation. For licenses related to other ROCm components, refer to the
|
||||
following section.
|
||||
:::
|
||||
|
||||
## ROCm component licenses
|
||||
|
||||
ROCm is released by Advanced Micro Devices, Inc. (AMD) and is licensed per component separately.
|
||||
The following table is a list of ROCm components with links to their respective license
|
||||
terms. These components may include third party components subject to
|
||||
additional licenses. Please review individual repositories for more information.
|
||||
|
||||
<!-- spellcheck-disable -->
|
||||
| Component | License |
|
||||
|:---------------------|:-------------------------|
|
||||
| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/LICENSE.txt) |
|
||||
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
|
||||
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
|
||||
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
|
||||
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
|
||||
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
|
||||
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
|
||||
| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipamd](https://github.com/ROCm/clr/tree/amd-staging/hipamd) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/hipamd/LICENSE.txt) |
|
||||
| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
|
||||
| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
|
||||
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
|
||||
| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
|
||||
| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
|
||||
| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
|
||||
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
|
||||
| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
|
||||
| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
|
||||
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html) |
|
||||
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
|
||||
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
|
||||
| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
|
||||
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
|
||||
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
|
||||
| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
|
||||
| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
|
||||
| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
|
||||
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
|
||||
| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
|
||||
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) |
|
||||
| [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) |
|
||||
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
|
||||
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
|
||||
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
|
||||
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
|
||||
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
|
||||
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
|
||||
| [ROCm Compute Profiler](https://github.com/ROCm/rocprofiler-compute) | [MIT](https://github.com/ROCm/rocprofiler-compute/blob/amd-staging/LICENSE) |
|
||||
| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/amd-staging/LICENSE) |
|
||||
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
|
||||
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/amd-staging/opencl) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/opencl/LICENSE.txt) |
|
||||
| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
|
||||
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/amd-staging/License.txt) |
|
||||
| [ROCm Systems Profiler](https://github.com/ROCm/rocprofiler-systems) | [MIT](https://github.com/ROCm/rocprofiler-systems/blob/amd-staging/LICENSE) |
|
||||
| [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
|
||||
| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
|
||||
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-staging/LICENSE) |
|
||||
| [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
|
||||
| [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) |
|
||||
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
|
||||
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
|
||||
| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
|
||||
| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
|
||||
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
|
||||
| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
|
||||
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
|
||||
Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com).
|
||||
Proprietary components are organized in a proprietary subdirectory in the package
|
||||
repositories to distinguish from open sourced packages.
|
||||
|
||||
```{note}
|
||||
The following additional terms and conditions apply to your use of ROCm technical documentation.
|
||||
```
|
||||
|
||||
©2023 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
The information presented in this document is for informational purposes only
|
||||
and may contain technical inaccuracies, omissions, and typographical errors. The
|
||||
information contained herein is subject to change and may be rendered inaccurate
|
||||
for many reasons, including but not limited to product and roadmap changes,
|
||||
component and motherboard version changes, new model and/or product releases,
|
||||
product differences between differing manufacturers, software changes, BIOS
|
||||
flashes, firmware upgrades, or the like. Any computer system has risks of
|
||||
security vulnerabilities that cannot be completely prevented or mitigated. AMD
|
||||
assumes no obligation to update or otherwise correct or revise this information.
|
||||
However, AMD reserves the right to revise this information and to make changes
|
||||
from time to time to the content hereof without obligation of AMD to notify any
|
||||
person of such revisions or changes.
|
||||
|
||||
THIS INFORMATION IS PROVIDED “AS IS.” AMD MAKES NO REPRESENTATIONS OR WARRANTIES
|
||||
WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY
|
||||
INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS INFORMATION. AMD
|
||||
SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF NON-INFRINGEMENT,
|
||||
MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. IN NO EVENT WILL AMD BE
|
||||
LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, INDIRECT, SPECIAL, OR OTHER
|
||||
CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY INFORMATION CONTAINED HEREIN,
|
||||
EVEN IF AMD IS EXPRESSLY ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
AMD, the AMD Arrow logo, ROCm, and combinations thereof are trademarks of
|
||||
Advanced Micro Devices, Inc. Other product names used in this publication are
|
||||
for identification purposes only and may be trademarks of their respective
|
||||
companies.
|
||||
|
||||
### Package licensing
|
||||
|
||||
:::{attention}
|
||||
AQL Profiler and AOCC CPU optimization are both provided in binary form, each
|
||||
subject to the license agreement enclosed in the directory for the binary available
|
||||
in `/opt/rocm/share/doc/hsa-amd-aqlprofile/EULA`. By using, installing,
|
||||
copying or distributing AQL Profiler and/or AOCC CPU Optimizations, you agree to
|
||||
the terms and conditions of this license agreement. If you do not agree to the
|
||||
terms of this agreement, do not install, copy or use the AQL Profiler and/or the
|
||||
AOCC CPU Optimizations.
|
||||
:::
|
||||
|
||||
For the rest of the ROCm packages, you can find the licensing information at the
|
||||
following location: `/opt/rocm/share/doc/<component-name>/` or in the locations
|
||||
specified in the preceding table.
|
||||
|
||||
For example, you can fetch the licensing information of the `amd_comgr`
|
||||
component (Code Object Manager) from the `/opt/rocm/share/doc/amd_comgr/LICENSE.txt` file.
|
||||
55
docs/benchmark-docker/index.rst
Normal file
55
docs/benchmark-docker/index.rst
Normal file
@@ -0,0 +1,55 @@
|
||||
***************************************************
|
||||
AI training and inference performance with ROCm 7.0
|
||||
***************************************************
|
||||
|
||||
AMD ROCm is an open-source software platform optimized to extract HPC and AI
|
||||
workload performance from AMD Instinct™ accelerators and GPUs while maintaining
|
||||
compatibility with industry software frameworks.
|
||||
|
||||
.. note::
|
||||
|
||||
ROCm 7.0 is now available. See the documentation at `ROCm 7.0 documentation
|
||||
<https://rocm.docs.amd.com/en/docs-7.0.0/>`__.
|
||||
|
||||
This documentation accompanies preview Docker images designed to reproduce
|
||||
training and inference performance on AMD Instinct™ MI355X, MI350X, and MI300X
|
||||
series accelerators with ROCm 7.0. The images provide the 7.0 release of the
|
||||
ROCm software stack and are targeted at users evaluating AI inference workloads
|
||||
using next-generation AMD accelerators. See the Docker image repository at
|
||||
`rocm/7.0 <https://hub.docker.com/r/rocm/7.0/>`__.
|
||||
|
||||
.. important::
|
||||
|
||||
The following AI workload benchmarks use ROCm 7.0 on AMD Instinct MI355X,
|
||||
MI350X, and MI300X series accelerators.
|
||||
|
||||
For other workloads for MI300X series accelerators, see
|
||||
`Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html>`_.
|
||||
|
||||
.. grid:: 2
|
||||
|
||||
.. grid-item-card:: Training
|
||||
|
||||
* :doc:`training-maxtext-llama-3`
|
||||
|
||||
* :doc:`training-maxtext-mixtral-8x7b`
|
||||
|
||||
* :doc:`training-megatron-lm-llama-3`
|
||||
|
||||
* :doc:`training-mlperf-fine-tuning-llama-2-70b`
|
||||
|
||||
* :doc:`training-torchtitan-llama-3`
|
||||
|
||||
.. grid-item-card:: Inference
|
||||
|
||||
* :doc:`inference-vllm-llama-3.1-405b-fp4`
|
||||
|
||||
* :doc:`inference-vllm-llama-3.3-70b-fp8`
|
||||
|
||||
* :doc:`inference-vllm-deepseek-r1-fp8`
|
||||
|
||||
* :doc:`inference-vllm-gpt-oss-120b`
|
||||
|
||||
* :doc:`inference-sglang-deepseek-r1-fp4`
|
||||
|
||||
* :doc:`inference-sglang-deepseek-r1-fp8`
|
||||
99
docs/benchmark-docker/inference-sglang-deepseek-r1-fp4.rst
Normal file
99
docs/benchmark-docker/inference-sglang-deepseek-r1-fp4.rst
Normal file
@@ -0,0 +1,99 @@
|
||||
***********************************************
|
||||
Benchmark DeepSeek R1 FP4 inference with SGLang
|
||||
***********************************************
|
||||
|
||||
This section provides instructions to test the inference performance of
|
||||
DeepSeek R1 with FP4 precision via the SGLang serving framework. The
|
||||
accompanying Docker image integrates `ROCm 7.0
|
||||
<https://rocm.docs.amd.com/en/latest/>`__ with SGLang, and is supported only on AMD
|
||||
Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the benchmark.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image
|
||||
<https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
See the model card on Hugging Face at `DeepSeek-R1-MXFP4-Preview
|
||||
<https://huggingface.co/amd/DeepSeek-R1-MXFP4-Preview>`__. This model uses
|
||||
microscaling 4-bit floating point (MXFP4) quantization through `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \
|
||||
huggingface-cli download amd/DeepSeek-R1-0528-MXFP4-Preview --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--user root \
|
||||
--group-add video \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /app/ \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--shm-size 64G \
|
||||
--mount type=bind,src=/data,dst=/data \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-e SGLANG_USE_AITER=1 \
|
||||
rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path amd/DeepSeek-R1-0528-MXFP4-Preview \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--tensor-parallel-size 8 \
|
||||
--trust-remote-code \
|
||||
--chunked-prefill-size 196608 \
|
||||
--mem-fraction-static 0.8 \
|
||||
--disable-radix-cache \
|
||||
--num-continuous-decode-steps 4 \
|
||||
--max-prefill-tokens 196608 \
|
||||
--cuda-graph-max-bs 128 &
|
||||
|
||||
3. Run the benchmark with the following options.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=64
|
||||
num_prompts=128
|
||||
|
||||
python3 -m sglang.bench_serving \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--model amd/DeepSeek-R1-0528-MXFP4-Preview \
|
||||
--dataset-name random \
|
||||
--random-input ${input_tokens} \
|
||||
--random-output ${output_tokens} \
|
||||
--random-range-ratio 1.0 \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompt ${num_prompts}
|
||||
|
||||
132
docs/benchmark-docker/inference-sglang-deepseek-r1-fp8.rst
Normal file
132
docs/benchmark-docker/inference-sglang-deepseek-r1-fp8.rst
Normal file
@@ -0,0 +1,132 @@
|
||||
***********************************************
|
||||
Benchmark DeepSeek R1 FP8 inference with SGLang
|
||||
***********************************************
|
||||
|
||||
This section provides instructions to test the inference performance of DeepSeek R1
|
||||
with FP8 precision via the SGLang serving framework.
|
||||
The accompanying Docker image integrates ROCm 7.0 with SGLang, and is
|
||||
supported on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the benchmark.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the appropriate `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__
|
||||
for your system.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MI355X and MI350X
|
||||
:sync: mi35x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
.. tab-item:: MI300X series
|
||||
:sync: mi30x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi30x-20250915
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
See the model card on Hugging Face at `deepseek-ai/DeepSeek-R1-0528
|
||||
<https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \
|
||||
huggingface-cli download deepseek-ai/DeepSeek-R1-0528 --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MI355X and MI350X
|
||||
:sync: mi35x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--user root \
|
||||
--group-add video \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /app/ \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--shm-size 64G \
|
||||
--mount type=bind,src=/data,dst=/data \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-e SGLANG_USE_AITER=1 \
|
||||
rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
|
||||
|
||||
.. tab-item:: MI300X series
|
||||
:sync: mi30x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--user root \
|
||||
--group-add video \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-w /app/ \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--shm-size 64G \
|
||||
--mount type=bind,src=/data,dst=/data \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-e SGLANG_USE_AITER=1 \
|
||||
rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi30x-20250915
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path deepseek-ai/DeepSeek-R1-0528 \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--tensor-parallel-size 8 \
|
||||
--trust-remote-code \
|
||||
--chunked-prefill-size 196608 \
|
||||
--mem-fraction-static 0.8 \
|
||||
--disable-radix-cache \
|
||||
--num-continuous-decode-steps 4 \
|
||||
--max-prefill-tokens 196608 \
|
||||
--cuda-graph-max-bs 128 &
|
||||
|
||||
3. Run the benchmark with the following options.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=64
|
||||
num_prompts=128
|
||||
|
||||
python3 -m sglang.bench_serving \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--model deepseek-ai/DeepSeek-R1-0528 \
|
||||
--dataset-name random \
|
||||
--random-input ${input_tokens} \
|
||||
--random-output ${output_tokens} \
|
||||
--random-range-ratio 1.0 \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompt ${num_prompts}
|
||||
|
||||
122
docs/benchmark-docker/inference-vllm-deepseek-r1-fp8.rst
Normal file
122
docs/benchmark-docker/inference-vllm-deepseek-r1-fp8.rst
Normal file
@@ -0,0 +1,122 @@
|
||||
************************************************
|
||||
Benchmark DeepSeek R1 FP8 inference with vLLM
|
||||
************************************************
|
||||
|
||||
This section provides instructions to test the inference performance of DeepSeek R1
|
||||
with FP8 precision on the vLLM inference engine. The provided Docker image integrates
|
||||
`ROCm 7.0 <https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with vLLM.
|
||||
This benchmark supports AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the benchmark.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`deepseek-ai/DeepSeek-R1-0528 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=deepseek-ai/DeepSeek-R1-0528
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=deepseek-ai/DeepSeek-R1-0528
|
||||
max_model_len=16384 # Must be >= the input + the output lengths.
|
||||
max_seq_len_to_capture=10240 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024
|
||||
max_num_batched_tokens=131072 # Smaller values may result in better TTFT but worse TPOT / throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
# Note: this flag may not be compatible with MI325X GPUs
|
||||
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
# Note: Using `--kv-cache-dtype fp8` with DeepSeek may cause accuracy issues
|
||||
vllm serve ${model} \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--no-enable-prefix-caching \
|
||||
--max-num-batched-tokens ${max_num_batched_tokens} \
|
||||
--max-model-len ${max_model_len} \
|
||||
--block-size 1 \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
# Run the client benchmark
|
||||
input_tokens=8192
|
||||
output_tokens=1024
|
||||
max_concurrency=4
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
163
docs/benchmark-docker/inference-vllm-gpt-oss-120b.rst
Normal file
163
docs/benchmark-docker/inference-vllm-gpt-oss-120b.rst
Normal file
@@ -0,0 +1,163 @@
|
||||
**********************************************
|
||||
Benchmark GPT OSS 120B inference with vLLM
|
||||
**********************************************
|
||||
|
||||
This section provides instructions to test the inference performance of OpenAI
|
||||
GPT OSS 120B on the vLLM inference engine. The provided Docker
|
||||
image integrates `ROCm 7.0
|
||||
<https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with
|
||||
vLLM. This benchmark supports AMD Instinct MI355X, MI350X, MI325X, and MI300X
|
||||
GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`openai/gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=openai/gpt-oss-120b
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MI355X and MI350X
|
||||
:sync: mi35x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=openai/gpt-oss-120b
|
||||
max_model_len=10368 # 1.125 x (input sequence length + output sequence length); e.g. 1.125 x (8192 + 1024) = 10368.
|
||||
max_seq_len_to_capture=10368 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024 # Set to max_concurrency of the client to get better throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
export VLLM_ROCM_USE_AITER_FUSED_MOE_A16W4=1
|
||||
|
||||
vllm serve ${model} \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--compilation-config '{"compile_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,256,512,1024,2048,8192] , "cudagraph_capture_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,136,144,152,160,168,176,184,192,200,208,216,224,232,240,248,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,520,528,536,544,552,560,568,576,584,592,600,608,616,624,632,640,648,656,664,672,680,688,696,704,712,720,728,736,744,752,760,768,776,784,792,800,808,816,824,832,840,848,856,864,872,880,888,896,904,912,920,928,936,944,952,960,968,976,984,992,1000,1008,1016,1024,2048,4096,8192] , "cudagraph_mode": "FULL_AND_PIECEWISE"}' \
|
||||
--block-size=64 \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
.. tab-item:: MI325X and MI300X
|
||||
:sync: mi30x
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=openai/gpt-oss-120b
|
||||
max_model_len=10368 # 1.125 x (input sequence length + output sequence length); e.g. 1.125 x (8192 + 1024) = 10368.
|
||||
max_seq_len_to_capture=10368 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024 # Set to max_concurrency of the client to get better throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
|
||||
|
||||
# Set this flag for MI300X only; it is not yet compatible with MI325X.
|
||||
# export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
vllm serve ${model} \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--compilation-config '{"cudagraph_mode": "FULL_AND_PIECEWISE"}' \
|
||||
--block-size=64 \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Run the client benchmark
|
||||
model=openai/gpt-oss-120b
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=4
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
193
docs/benchmark-docker/inference-vllm-llama-3.1-405b-fp4.rst
Normal file
193
docs/benchmark-docker/inference-vllm-llama-3.1-405b-fp4.rst
Normal file
@@ -0,0 +1,193 @@
|
||||
************************************************
|
||||
Benchmark Llama 3.3/3.1 FP4 inference with vLLM
|
||||
************************************************
|
||||
|
||||
This section provides instructions to test the inference performance of Llama
|
||||
3.3 70B and Llama 3.1 405B with MXFP4 precision on the vLLM inference engine.
|
||||
The provided Docker image integrates `ROCm 7.0
|
||||
<https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with vLLM.
|
||||
This benchmark supports AMD Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B MXFP4
|
||||
:sync: Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.3-70B-Instruct-MXFP4-Preview <https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview>`__.
|
||||
This model uses FP4 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
.. tab-item:: Llama 3.1 405B MXFP4
|
||||
:sync: Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.1-405B-Instruct-MXFP4-Preview <https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview>`__.
|
||||
This model uses FP4 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B MXFP4
|
||||
:sync: Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. tab-item:: Llama 3.1 405B MXFP4
|
||||
:sync: Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
max_model_len=10240 # Must be >= the input + the output lengths.
|
||||
max_seq_len_to_capture=10240 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024
|
||||
max_num_batched_tokens=131072 # Smaller values may result in better TTFT but worse TPOT / throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
# The following setting is recommended for most configurations:
|
||||
export VLLM_TRITON_FP4_GEMM_USE_ASM=1
|
||||
|
||||
# For tensor parallelism >1 at low concurrency (<= 16 for input length 1024, <= 4 for input length 8192),
|
||||
# uncomment these lines:
|
||||
# export VLLM_TRITON_FP4_GEMM_USE_ASM=0
|
||||
# export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
|
||||
|
||||
# 0 is recommended for most configurations.
|
||||
# 1 (the default) is faster for input lengths of 8192 with concurrency > 16.
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
|
||||
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
vllm serve ${model} \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--kv-cache-dtype fp8 \
|
||||
--gpu-memory-utilization 0.94 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--max-num-batched-tokens ${max_num_batched_tokens} \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B MXFP4
|
||||
:sync: Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
|
||||
.. tab-item:: Llama 3.1 405B MXFP4
|
||||
:sync: Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Run the client benchmark
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=64
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
189
docs/benchmark-docker/inference-vllm-llama-3.3-70b-fp8.rst
Normal file
189
docs/benchmark-docker/inference-vllm-llama-3.3-70b-fp8.rst
Normal file
@@ -0,0 +1,189 @@
|
||||
************************************************
|
||||
Benchmark Llama 3.3/3.1 FP8 inference with vLLM
|
||||
************************************************
|
||||
|
||||
This section provides instructions to test the inference performance of Llama
|
||||
3.3 70B and Llama 3.1 405B with FP8 precision on the vLLM inference engine. The provided Docker image integrates
|
||||
`ROCm 7.0 <https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html>`__ with vLLM.
|
||||
This benchmark supports AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.x-preview/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
Download the model
|
||||
==================
|
||||
|
||||
While vLLM can download model weights at runtime, it's recommended to
|
||||
download ahead of time. You will need:
|
||||
|
||||
* A valid `Hugging Face access token <https://huggingface.co/docs/hub/security-tokens>`__.
|
||||
Remember to set ``HF_TOKEN`` to your access token.
|
||||
|
||||
* Access granted to the specific model from your Hugging Face account
|
||||
|
||||
In the following snippet, set ``HF_TOKEN`` to your access token.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B FP8
|
||||
:sync: Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.3-70B-Instruct-FP8-KV <https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV>`__.
|
||||
This model uses FP8 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
.. tab-item:: Llama 3.1 405B FP8
|
||||
:sync: Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
See the model card on Hugging Face at
|
||||
`amd/Llama-3.1-405B-Instruct-FP8-KV <https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV>`__.
|
||||
This model uses FP8 quantization via `AMD Quark
|
||||
<https://quark.docs.amd.com/latest/>`_ for efficient inference on AMD
|
||||
accelerators.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
pip install huggingface_hub[cli] hf_transfer hf_xet
|
||||
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
||||
HF_HOME=/data/huggingface-cache \
|
||||
HF_TOKEN="<HF_TOKEN>" \ # Replace with your HF_TOKEN Hugging Face access token.
|
||||
huggingface-cli download ${model} --exclude "original/*"
|
||||
|
||||
Run the inference benchmark
|
||||
===========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--ipc=host \
|
||||
--network=host \
|
||||
--privileged \
|
||||
--cap-add=CAP_SYS_ADMIN \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
-v /data:/data \
|
||||
-e HF_HOME=/data/huggingface-cache \
|
||||
-e HF_HUB_OFFLINE=1 \
|
||||
--name vllm-server \
|
||||
rocm/7.x-preview:rocm7.2_preview_ubuntu_22.04_vlm_0.10.1_instinct_20251029
|
||||
|
||||
2. Start the server. On MI300X and MI325X GPUs, include ``--dtype float16`` in your ``vllm serve`` arguments.
|
||||
This is not necessary on MI350X and MI355X GPUs.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B FP8
|
||||
:sync: Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. tab-item:: Llama 3.1 405B FP8
|
||||
:sync: Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
max_model_len=10240 # Must be >= the input + the output lengths.
|
||||
max_seq_len_to_capture=10240 # Beneficial to set this to max_model_len.
|
||||
max_num_seqs=1024
|
||||
max_num_batched_tokens=131072 # Smaller values may result in better TTFT but worse TPOT / throughput.
|
||||
tensor_parallel_size=8
|
||||
|
||||
# Note: this flag may not be compatible with MI325X GPUs
|
||||
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
|
||||
|
||||
# 0 is recommended for most configurations.
|
||||
# 1 (the default) is faster for input lengths of 8192 with concurrency > 16.
|
||||
export VLLM_ROCM_USE_AITER_MHA=0
|
||||
|
||||
vllm serve ${model} \
|
||||
--host localhost \
|
||||
--port 8000 \
|
||||
--swap-space 64 \
|
||||
--max-model-len ${max_model_len} \
|
||||
--tensor-parallel-size ${tensor_parallel_size} \
|
||||
--max-num-seqs ${max_num_seqs} \
|
||||
--distributed-executor-backend mp \
|
||||
--kv-cache-dtype fp8 \
|
||||
--gpu-memory-utilization 0.94 \
|
||||
--max-seq-len-to-capture ${max_seq_len_to_capture} \
|
||||
--max-num-batched-tokens ${max_num_batched_tokens} \
|
||||
--no-enable-prefix-caching \
|
||||
--async-scheduling
|
||||
|
||||
# Wait for model to load and server is ready to accept requests.
|
||||
|
||||
3. Open another terminal on the same machine, connect to your running
|
||||
``vllm-server`` container, and run the benchmark with the appropriate
|
||||
options. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Connect to server
|
||||
docker exec -it vllm-server bash
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3.3 70B FP8
|
||||
:sync: Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
|
||||
.. tab-item:: Llama 3.1 405B FP8
|
||||
:sync: Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
model=amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Run the client benchmark
|
||||
input_tokens=1024
|
||||
output_tokens=1024
|
||||
max_concurrency=4
|
||||
num_prompts=32
|
||||
|
||||
python3 /app/vllm/benchmarks/benchmark_serving.py --host localhost --port 8000 \
|
||||
--model ${model} \
|
||||
--dataset-name random \
|
||||
--random-input-len ${input_tokens} \
|
||||
--random-output-len ${output_tokens} \
|
||||
--max-concurrency ${max_concurrency} \
|
||||
--num-prompts ${num_prompts} \
|
||||
--percentile-metrics ttft,tpot,itl,e2el \
|
||||
--ignore-eos
|
||||
122
docs/benchmark-docker/training-maxtext-llama-3.rst
Normal file
122
docs/benchmark-docker/training-maxtext-llama-3.rst
Normal file
@@ -0,0 +1,122 @@
|
||||
*******************************************
|
||||
Benchmark Llama 3 pre-training with MaxText
|
||||
*******************************************
|
||||
|
||||
This page describes how to benchmark Llama 3 8B and 70B pre-training using the
|
||||
MaxText framework. It includes configurations for both
|
||||
FP8 and BF16 precision to measure throughput. The provided Docker
|
||||
image integrates ROCm 7.0 with MaxText
|
||||
-- and is supported on AMD Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/maxtext \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b_env.sh
|
||||
bash llama3_8b_env.sh
|
||||
python3 -m MaxText.train llama3_8b.yml
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_8b_env.sh
|
||||
bash llama3_8b_env.sh
|
||||
python3 -m MaxText.train llama3_8b.yml quantization=fp8
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_70b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/llama3_70b_env.sh
|
||||
bash llama3_70b_env.sh
|
||||
python3 -m MaxText.train llama3_70b.yml
|
||||
|
||||
.. rubric:: Options
|
||||
|
||||
The ``MaxText.train`` script accepts the following options:
|
||||
|
||||
* ``per_device_batch_size``: Per-device batch size
|
||||
|
||||
* ``quantization``: quantization
|
||||
|
||||
* ``max_target_length``: Maximum input token sequence length
|
||||
|
||||
* ``steps``: Number of training iterations to execute
|
||||
|
||||
See this base [config](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/configs/base.yml)
|
||||
for the full list of settings you can change.
|
||||
|
||||
Other supported models
|
||||
======================
|
||||
|
||||
* Llama-2-7B FP8 and BF16
|
||||
|
||||
* Llama-2-70B FP8 and BF16
|
||||
|
||||
* Llama-3.3-70B BF16
|
||||
|
||||
* DeepSeek-V2-Lite FP8 and BF16
|
||||
|
||||
* Mixtral 8x7B FP8 and BF16
|
||||
|
||||
Known issue
|
||||
===========
|
||||
|
||||
Some models and configurations may trigger a "Memory Access Fault" error.
|
||||
Updates to improve stability are planned for upcoming releases.
|
||||
111
docs/benchmark-docker/training-maxtext-mixtral-8x7b.rst
Normal file
111
docs/benchmark-docker/training-maxtext-mixtral-8x7b.rst
Normal file
@@ -0,0 +1,111 @@
|
||||
************************************************
|
||||
Benchmark Mixtral 8x7B pre-training with MaxText
|
||||
************************************************
|
||||
|
||||
This page describes how to benchmark the Mixtral 8x7B pre-training using the
|
||||
MaxText framework. It includes configurations for both
|
||||
FP8 and BF16 precision to measure throughput. The provided Docker
|
||||
image integrates a ROCm 7.0 with MaxText
|
||||
-- and is supported on AMD Instinct MI355X and MI350X GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/maxtext \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_jax_training_mi35x_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Mixtral 8x7B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b_env.sh
|
||||
bash mixtral_8x7b_env.sh
|
||||
python3 -m MaxText.train mixtral_8x7b.yml
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b.yml
|
||||
wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/env_scripts/mixtral_8x7b_env.sh
|
||||
bash mixtral_8x7b_env.sh
|
||||
python3 -m MaxText.train mixtral_8x7b.yml quantization=fp8
|
||||
|
||||
.. rubric:: Options
|
||||
|
||||
The ``MaxText.train`` script accepts the following options:
|
||||
|
||||
* ``per_device_batch_size``: Per-device batch size
|
||||
|
||||
* ``quantization``: quantization
|
||||
|
||||
* ``max_target_length``: Maximum input token sequence length
|
||||
|
||||
* ``steps``: Number of training iterations to execute
|
||||
|
||||
See this base [config](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/configs/base.yml)
|
||||
for the full list of settings you can change.
|
||||
|
||||
Other supported models
|
||||
======================
|
||||
|
||||
* Llama-2-7B FP8 and BF16
|
||||
|
||||
* Llama-2-70B FP8 and BF16
|
||||
|
||||
* Llama-3.1-8B FP8 and BF16
|
||||
|
||||
* Llama-3.1-70B BF16
|
||||
|
||||
* Llama-3.3-70B BF16
|
||||
|
||||
* DeepSeek-V2-Lite FP8 and BF16
|
||||
|
||||
Known issue
|
||||
===========
|
||||
|
||||
Some models and configurations may trigger a "Memory Access Fault" error.
|
||||
Updates to improve stability are planned for upcoming releases.
|
||||
171
docs/benchmark-docker/training-megatron-lm-llama-3.rst
Normal file
171
docs/benchmark-docker/training-megatron-lm-llama-3.rst
Normal file
@@ -0,0 +1,171 @@
|
||||
***********************************************
|
||||
Benchmark Llama 3 pre-training with Megatron-LM
|
||||
***********************************************
|
||||
|
||||
This page describes how to benchmark Llama 3 8B and 70B pre-training using the
|
||||
Megatron-LM framework. It includes configurations for both FP8 and BF16
|
||||
precision to measure throughput. The provided Docker image integrates ROCm
|
||||
7.0 with Megatron-LM -- and is supported on AMD Instinct MI355X and MI350X
|
||||
GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/Megatron-LM \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
TEE_OUTPUT=1 \
|
||||
MBS=4 \
|
||||
BS=512 \
|
||||
TP=1 \
|
||||
TE_FP8=0 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=8 \
|
||||
TOTAL_ITERS=10 \
|
||||
GEMM_TUNING=1 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
TEE_OUTPUT=1 \
|
||||
MBS=4 \
|
||||
BS=512 \
|
||||
TP=1 \
|
||||
TE_FP8=1 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=8 \
|
||||
TOTAL_ITERS=10 \
|
||||
GEMM_TUNING=0 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CKPT_FORMAT=torch_dist \
|
||||
TEE_OUTPUT=1 \
|
||||
MBS=3 \
|
||||
BS=24 \
|
||||
TP=1 \
|
||||
TE_FP8=0 \
|
||||
FSDP=1 \
|
||||
RECOMPUTE=1 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=70 \
|
||||
TOTAL_ITERS=10 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CKPT_FORMAT=torch_dist \
|
||||
TEE_OUTPUT=1 \
|
||||
RECOMPUTE=1 \
|
||||
MBS=3 \
|
||||
BS=24 \
|
||||
TP=1 \
|
||||
TE_FP8=1 \
|
||||
SEQ_LENGTH=8192 \
|
||||
MODEL_SIZE=70 \
|
||||
FSDP=1 \
|
||||
TOTAL_ITERS=10 \
|
||||
NUM_LAYERS=40 \
|
||||
bash examples/llama/train_llama3.sh
|
||||
|
||||
.. rubric:: Options
|
||||
|
||||
The ``train_llama3.sh`` script accepts the following options:
|
||||
|
||||
* ``MBS``: Micro-batch size per GPU
|
||||
|
||||
* ``BS``: Global batch size
|
||||
|
||||
* ``TP``: Tensor parallelism
|
||||
|
||||
* ``SEQ_LENGTH``: Maximum input token sequence length
|
||||
|
||||
* ``TE_FP8``: Toggle to enable FP8
|
||||
|
||||
* ``TOTAL_ITERS``: Number of training iterations to execute
|
||||
|
||||
Other supported models
|
||||
======================
|
||||
|
||||
* Llama-2-7B
|
||||
|
||||
* Llama-3.3-70B
|
||||
|
||||
* DeepSeek-V2-Lite
|
||||
|
||||
* Mixtral-8x7B
|
||||
|
||||
* Qwen-2.5-7B
|
||||
|
||||
* Qwen-2.5-72B
|
||||
|
||||
Known issues
|
||||
============
|
||||
|
||||
- Some models and configurations may trigger a "Memory Access Fault" error.
|
||||
Updates to improve stability are planned for upcoming releases.
|
||||
|
||||
- A regression related to Composable Kernel (CK) may cause errors in certain
|
||||
cases. A fix is planned for an upcoming release.
|
||||
|
||||
- Flash Attention forward may not pick the best performing ASM kernel with batch
|
||||
size greater than 1 and impact model performance.
|
||||
For best performance on MI355X and MI350X GPUs, use the ROCm 7.0 Preview beta Docker
|
||||
(``rocm/7.0-preview:rocm7.0_preview_pytorch_training_mi35x_beta``).
|
||||
@@ -0,0 +1,151 @@
|
||||
**************************************************
|
||||
Benchmark Llama 2 70B LoRA fine-tuning with MLPerf
|
||||
**************************************************
|
||||
|
||||
This guide provides instructions to benchmark LoRA fine-tuning on the Llama 2
|
||||
70B model. The benchmark follows the MLPerf training submission for
|
||||
long-document summarization using the GovReport dataset.
|
||||
|
||||
The provided Docker image integrates the `ROCm 7.0 <https://rocm.docs.amd.com/en/latest/>`__
|
||||
software stack and is optimized for AMD Instinct MI355X, MI350X, MI325X, and MI300X
|
||||
GPUs.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
1. Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915
|
||||
|
||||
2. Copy the benchmark scripts from the container to your host. These scripts
|
||||
are used to configure the environment and launch the benchmark.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
container_id=$(docker create rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915) && \
|
||||
docker cp $container_id:/workspace/code/runtime_tunables.sh . && \
|
||||
docker cp $container_id:/workspace/code/run_with_docker.sh . && \
|
||||
docker cp $container_id:/workspace/code/config_MI355X_1x8x1.sh . && \
|
||||
docker rm $container_id
|
||||
|
||||
.. note::
|
||||
|
||||
The ``config_*.sh`` files contain system-specific hyperparameters used in
|
||||
the :ref:`run step <system-config>`. You will need to copy the one that
|
||||
matches your hardware configuration.
|
||||
|
||||
Prepare the GovReport dataset
|
||||
=============================
|
||||
|
||||
This benchmark uses the Llama 2 70B model with fused QKV and the GovReport dataset.
|
||||
GovReport is a dataset for long document summarization that consists of
|
||||
reports written by government research agencies. The dataset hosted on the
|
||||
MLPerf drive is already tokenized and packed so that each sequence has
|
||||
length 8192.
|
||||
|
||||
1. Download and preprocess the dataset.
|
||||
|
||||
Start the Docker container by mounting the volume you want to use for
|
||||
downloading the data under ``/data`` within the container. This example uses
|
||||
``/data/mlperf_llama2`` as the host's download directory:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--net=host \
|
||||
--uts=host \
|
||||
--ipc=host \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--privileged \
|
||||
--security-opt=seccomp=unconfined \
|
||||
--volume=/data/mlperf_llama2:/data \
|
||||
--volume=/data/mlperf_llama2/model:/ckpt \
|
||||
rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915
|
||||
|
||||
2. From within the container, run the preparation script. This will download and
|
||||
preprocess the dataset and model.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
bash ./scripts/prepare_data_and_model.sh
|
||||
|
||||
3. Verify the preprocessed files. After the script completes, check for the
|
||||
following files in the mounted directories.
|
||||
|
||||
After preprocessing, you should see the following files in the ``/data/model`` directory:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
<hash>_tokenizer.model llama2-70b.nemo
|
||||
model_config.yaml model_weights
|
||||
|
||||
And the following files in ``/data/data``:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
train.npy validation.npy
|
||||
|
||||
4. Exit the container and return to your host shell.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
exit
|
||||
|
||||
Run the benchmark
|
||||
=================
|
||||
|
||||
With the dataset prepared, you can now configure and run the fine-tuning
|
||||
benchmark from your host machine.
|
||||
|
||||
1. Set the environment variables. These variables point to the directories you used
|
||||
for data, the model, and where the resulting logs should be stored.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export DATADIR=/data/mlperf_llama2
|
||||
export LOGDIR=/data/mlperf_llama2/results
|
||||
export CONT=rocm/7.0:rocm7.0_ubuntu22.04_llama2_70B_training_ml_perf_instinct_20250915
|
||||
|
||||
.. tip::
|
||||
|
||||
Ensure the log directory exists and is writable by the container user.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
mkdir -p $LOGDIR
|
||||
sudo chmod -R 777 $LOGDIR
|
||||
|
||||
.. _system-config:
|
||||
|
||||
2. Source the system-specific configuration file. The ``config_*.sh`` files
|
||||
contain optimized hyperparameters for different hardware configurations.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Use the appropriate config
|
||||
source config_MI355X_1x8x1.sh
|
||||
|
||||
3. To perform a single training run, use the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export NEXP=1
|
||||
bash run_with_docker.sh
|
||||
|
||||
Optionally, to perform 10 consecutive training runs:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export NEXP=10
|
||||
bash run_with_docker.sh
|
||||
|
||||
.. note::
|
||||
|
||||
To optimize performance, the ``run_with_docker.sh`` script automatically
|
||||
executes ``runtime_tunables.sh`` to apply system-level optimizations
|
||||
before starting the training job.
|
||||
|
||||
Upon run completion, the logs will be available under ``$LOGDIR``.
|
||||
113
docs/benchmark-docker/training-torchtitan-llama-3.rst
Normal file
113
docs/benchmark-docker/training-torchtitan-llama-3.rst
Normal file
@@ -0,0 +1,113 @@
|
||||
**********************************************
|
||||
Benchmark Llama 3 pre-training with torchtitan
|
||||
**********************************************
|
||||
|
||||
This page describes how to benchmark Llama 3 8B and 70B pre-training using
|
||||
torchtitan. The provided Docker image integrates
|
||||
ROCm 7.0 with torchtitan -- and is tailored for AMD Instinct MI355X and MI350X
|
||||
GPUs.
|
||||
|
||||
Follow these steps to pull the required image, spin up the container with the
|
||||
appropriate options, download the model, and run the throughput test.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <https://hub.docker.com/r/rocm/7.0/tags>`__.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
Run the training benchmark
|
||||
==========================
|
||||
|
||||
1. Start the container using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
-w /workspace/torchtitan \
|
||||
--name training_benchmark \
|
||||
rocm/7.0:rocm7.0_pytorch_training_instinct_20250915
|
||||
|
||||
.. note::
|
||||
|
||||
This containerized environment includes all necessary dependencies and pre-tuned
|
||||
configurations for the supported models and precision types.
|
||||
|
||||
2. Download the Llama 3 tokenizer. Make sure to set ``HF_TOKEN`` using
|
||||
a valid Hugging Face access token with Llama model permissions.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
:sync: 8b
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HF_TOKEN=#{your huggingface token with Llama 3 access}
|
||||
python3 scripts/download_tokenizer.py \
|
||||
--repo_id meta-llama/Meta-Llama-3-8B \
|
||||
--tokenizer_path "original" \
|
||||
--hf_token=${HF_TOKEN}
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
:sync: 70b
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HF_TOKEN=#{your huggingface token with Llama 3 access}
|
||||
python3 scripts/download_tokenizer.py \
|
||||
--repo_id meta-llama/Meta-Llama-3-70B \
|
||||
--tokenizer_path "original" \
|
||||
--hf_token=${HF_TOKEN}
|
||||
|
||||
3. Run the training script with the following options for your desired precision.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Llama 3 8B
|
||||
:sync: 8b
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_8b_fsdp_bf16.toml" ./run_train.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_8b_fsdp_fp8.toml" ./run_train.sh
|
||||
|
||||
.. tab-item:: Llama 3 70B
|
||||
:sync: 70b
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: BF16
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_70b_fsdp_bf16.toml" ./run_train.sh
|
||||
|
||||
.. tab-item:: FP8
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
CONFIG_FILE="./llama3_70b_fsdp_fp8.toml" ./run_train.sh
|
||||
@@ -1,122 +0,0 @@
|
||||
ROCm Version,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_ ,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
@@ -1,233 +0,0 @@
|
||||
.. meta::
|
||||
:description: ROCm compatibility matrix
|
||||
:keywords: GPU, architecture, hardware, compatibility, system, requirements, components, libraries
|
||||
|
||||
**************************************************************************************
|
||||
Compatibility matrix
|
||||
**************************************************************************************
|
||||
|
||||
Use this matrix to view the ROCm compatibility and system requirements across successive major and minor releases.
|
||||
|
||||
You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
|
||||
|
||||
Accelerators and GPUs listed in the following table support compute workloads (no display
|
||||
information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
|
||||
workloads, see the `Use ROCm on Radeon GPU documentation
|
||||
<https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
|
||||
compatibility and system requirements.
|
||||
|
||||
.. |br| raw:: html
|
||||
|
||||
<br/>
|
||||
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.4.1", "6.4.0", "6.3.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10
|
||||
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
|
||||
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
|
||||
,.. _architecture-support-compatibility-matrix:,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2
|
||||
CUB,2.5.0,2.5.0,2.3.2
|
||||
,,,
|
||||
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
|
||||
`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_ ,2.0.0,2.0.0,N/A
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60300
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25172,19.0.0.25133,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25172,19.0.0.25133,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25172,19.0.0.25133,18.0.0.24491
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42131
|
||||
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42131
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#kfd_support] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
|
||||
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
.. _OS-kernel-versions:
|
||||
|
||||
Operating systems, kernel and Glibc versions
|
||||
*********************************************
|
||||
|
||||
Use this lookup table to confirm which operating system and kernel versions are supported with ROCm.
|
||||
|
||||
.. csv-table::
|
||||
:header: "OS", "Version", "Kernel", "Glibc"
|
||||
:widths: 40, 20, 30, 20
|
||||
:stub-columns: 1
|
||||
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
|
||||
,,
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.5, 5.14+, 2.34
|
||||
,9.4, 5.14+, 2.34
|
||||
,9.3, 5.14+, 2.34
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
|
||||
,8.9, 4.18.0, 2.28
|
||||
,,
|
||||
`SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP6, "6.5.0+, 6.4.0", 2.38
|
||||
,15 SP5, 5.14.21, 2.31
|
||||
,,
|
||||
`Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
|
||||
,8, 5.15.0 (UEK), 2.28
|
||||
,,
|
||||
`Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
|
||||
,,
|
||||
`Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
|
||||
,,
|
||||
|
||||
.. note::
|
||||
|
||||
* See `Red Hat Enterprise Linux Release Dates <https://access.redhat.com/articles/3078>`_ to learn about the specific kernel versions supported on Red Hat Enterprise Linux (RHEL).
|
||||
* See `List of SUSE Linux Enterprise Server kernel <https://www.suse.com/support/kb/doc/?id=000019587>`_ to learn about the specific kernel version supported on SUSE Linux Enterprise Server (SLES).
|
||||
..
|
||||
Footnotes and ref anchors in below historical tables should be appended with "-past-60", to differentiate from the
|
||||
footnote references in the above, latest, compatibility matrix. It also allows to easily find & replace.
|
||||
An easy way to work is to download the historical.CSV file, and update open it in excel. Then when content is ready,
|
||||
delete the columns you don't need, to build the current compatibility matrix to use in above table. Find & replace all
|
||||
instances of "-past-60" to make it ready for above table.
|
||||
|
||||
|
||||
.. _past-rocm-compatibility-matrix:
|
||||
|
||||
Past versions of ROCm compatibility matrix
|
||||
***************************************************
|
||||
|
||||
Expand for full historical view of:
|
||||
|
||||
.. dropdown:: ROCm 6.0 - Present
|
||||
|
||||
You can `download the entire .csv <../downloads/compatibility-matrix-historical-6.0.csv>`_ for offline reference.
|
||||
|
||||
.. csv-table::
|
||||
:file: compatibility-matrix-historical-6.0.csv
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#kfd_support-past-60] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
|
||||
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
@@ -1,673 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: JAX compatibility
|
||||
:keywords: GPU, JAX compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
JAX compatibility
|
||||
*******************************************************************************
|
||||
|
||||
JAX provides a NumPy-like API, which combines automatic differentiation and the
|
||||
Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
|
||||
learning at scale.
|
||||
|
||||
JAX uses composable transformations of Python and NumPy through just-in-time (JIT) compilation,
|
||||
automatic vectorization, and parallelization. To learn about JAX, including profiling and
|
||||
optimizations, see the official `JAX documentation
|
||||
<https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
|
||||
|
||||
ROCm support for JAX is upstreamed and users can build the official source code with ROCm
|
||||
support:
|
||||
|
||||
- ROCm JAX release:
|
||||
|
||||
- Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>` with ROCm and JAX pre-installed.
|
||||
|
||||
- ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
|
||||
|
||||
- See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
to get started.
|
||||
|
||||
- Official JAX release:
|
||||
|
||||
- Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
|
||||
|
||||
- See the `AMD GPU (Linux) installation section
|
||||
<https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in the JAX
|
||||
documentation.
|
||||
|
||||
.. note::
|
||||
|
||||
AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
quarterly alongside new ROCm releases. These images undergo full AMD testing.
|
||||
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
follow upstream JAX releases and use the latest available ROCm version.
|
||||
|
||||
.. _jax-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are validated for
|
||||
`ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click the |docker-icon|
|
||||
icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: JAX Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.12/images/sha256-4069398229078f3311128b6d276c6af377c7e97d3363d020b0bf7154fae619ca"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 24.04
|
||||
- `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.10/images/sha256-a137f901f91ce6c13b424c40a6cf535248d4d20fd36d5daf5eee0570190a4a11"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.14 <https://www.python.org/downloads/release/python-31014/>`_
|
||||
|
||||
AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
|
||||
|
||||
.. list-table:: JAX community Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
Critical ROCm libraries for JAX
|
||||
================================================================================
|
||||
|
||||
The functionality of JAX with ROCm is determined by its underlying library
|
||||
dependencies. These critical ROCm components affect the capabilities,
|
||||
performance, and feature set available to developers. The versions described
|
||||
are available in ROCm :version:`rocm_version`.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
- Used in
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Matrix multiplication in ``jax.numpy.matmul``, ``jax.lax.dot`` and
|
||||
``jax.lax.dot_general``, operations like ``jax.numpy.dot``, which
|
||||
involve vector and matrix computations and batch matrix multiplications
|
||||
``jax.numpy.einsum`` with matrix-multiplication patterns algebra
|
||||
operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of hipBLAS, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- Matrix multiplication in ``jax.numpy.matmul`` or ``jax.lax.dot``, and
|
||||
the XLA (Accelerated Linear Algebra) use hipBLASLt for optimized matrix
|
||||
operations, mixed-precision support, and hardware-specific
|
||||
optimizations.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Reduction functions (``jax.numpy.sum``, ``jax.numpy.mean``,
|
||||
``jax.numpy.prod``, ``jax.numpy.max`` and ``jax.numpy.min``), prefix sum
|
||||
(``jax.numpy.cumsum``, ``jax.numpy.cumprod``) and sorting
|
||||
(``jax.numpy.sort``, ``jax.numpy.argsort``).
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
- Used in functions like ``jax.numpy.fft``.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
- The ``jax.random.uniform``, ``jax.random.normal``,
|
||||
``jax.random.randint`` and ``jax.random.split``.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
singular value decompositions (SVD).
|
||||
- Solving linear systems (``jax.numpy.linalg.solve``), matrix
|
||||
factorizations, SVD (``jax.numpy.linalg.svd``) and eigenvalue problems
|
||||
(``jax.numpy.linalg.eig``).
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
- Sparse matrix multiplication (``jax.numpy.matmul``), sparse
|
||||
matrix-vector and matrix-matrix products
|
||||
(``jax.experimental.sparse.dot``), sparse linear system solvers and
|
||||
sparse data handling.
|
||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
||||
- :version-ref:`hipSPARSELt rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
- Sparse matrix multiplication (``jax.numpy.matmul``), sparse
|
||||
matrix-vector and matrix-matrix products
|
||||
(``jax.experimental.sparse.dot``) and sparse linear system solvers.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Optimized for deep learning primitives such as convolutions, pooling,
|
||||
normalization, and activation functions.
|
||||
- Speeds up convolutional neural networks (CNNs), recurrent neural
|
||||
networks (RNNs), and other layers. Used in operations like
|
||||
``jax.nn.conv``, ``jax.nn.relu``, and ``jax.nn.batch_norm``.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimized for multi-GPU communication for operations like all-reduce,
|
||||
broadcast, and scatter.
|
||||
- Distribute computations across multiple GPU with ``pmap`` and
|
||||
``jax.distributed``. XLA automatically uses rccl when executing
|
||||
operations across multiple GPUs on AMD hardware.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
- Reduction operations like ``jax.numpy.sum``, ``jax.pmap`` for
|
||||
distributed training, which involves parallel reductions or
|
||||
operations like ``jax.numpy.cumsum`` can use rocThrust.
|
||||
|
||||
Supported and unsupported features
|
||||
===============================================================================
|
||||
|
||||
The following table maps GPU-accelerated JAX modules to their supported
|
||||
ROCm and JAX versions.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Description
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.numpy``
|
||||
- Implements the NumPy API, using the primitives in ``jax.lax``.
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy``
|
||||
- Provides GPU-accelerated and differentiable implementations of many
|
||||
functions from the SciPy library, leveraging JAX's transformations
|
||||
(e.g., ``grad``, ``jit``, ``vmap``).
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.lax``
|
||||
- A library of primitives operations that underpins libraries such as
|
||||
``jax.numpy.`` Transformation rules, such as Jacobian-vector product
|
||||
(JVP) and batching rules, are typically defined as transformations on
|
||||
``jax.lax`` primitives.
|
||||
- 0.1.57
|
||||
- 5.0.0
|
||||
* - ``jax.random``
|
||||
- Provides a number of routines for deterministic generation of sequences
|
||||
of pseudorandom numbers.
|
||||
- 0.1.58
|
||||
- 5.0.0
|
||||
* - ``jax.sharding``
|
||||
- Allows to define partitioning and distributing arrays across multiple
|
||||
devices.
|
||||
- 0.3.20
|
||||
- 5.1.0
|
||||
* - ``jax.dlpack``
|
||||
- For exchanging tensor data between JAX and other libraries that support the
|
||||
DLPack standard.
|
||||
- 0.1.57
|
||||
- 5.0.0
|
||||
* - ``jax.distributed``
|
||||
- Enables the scaling of computations across multiple devices on a single
|
||||
machine or across multiple machines.
|
||||
- 0.1.74
|
||||
- 5.0.0
|
||||
* - ``jax.dtypes``
|
||||
- Provides utilities for working with and managing data types in JAX
|
||||
arrays and computations.
|
||||
- 0.1.66
|
||||
- 5.0.0
|
||||
* - ``jax.image``
|
||||
- Contains image manipulation functions like resize, scale and translation.
|
||||
- 0.1.57
|
||||
- 5.0.0
|
||||
* - ``jax.nn``
|
||||
- Contains common functions for neural network libraries.
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.ops``
|
||||
- Computes the minimum, maximum, sum or product within segments of an
|
||||
array.
|
||||
- 0.1.57
|
||||
- 5.0.0
|
||||
* - ``jax.profiler``
|
||||
- Contains JAX’s tracing and time profiling features.
|
||||
- 0.1.57
|
||||
- 5.0.0
|
||||
* - ``jax.stages``
|
||||
- Contains interfaces to stages of the compiled execution process.
|
||||
- 0.3.4
|
||||
- 5.0.0
|
||||
* - ``jax.tree``
|
||||
- Provides utilities for working with tree-like container data structures.
|
||||
- 0.4.26
|
||||
- 5.6.0
|
||||
* - ``jax.tree_util``
|
||||
- Provides utilities for working with nested data structures, or
|
||||
``pytrees``.
|
||||
- 0.1.65
|
||||
- 5.0.0
|
||||
* - ``jax.typing``
|
||||
- Provides JAX-specific static type annotations.
|
||||
- 0.3.18
|
||||
- 5.1.0
|
||||
* - ``jax.extend``
|
||||
- Provides modules for access to JAX internal machinery module. The
|
||||
``jax.extend`` module defines a library view of some of JAX’s internal
|
||||
components.
|
||||
- 0.4.15
|
||||
- 5.5.0
|
||||
* - ``jax.example_libraries``
|
||||
- Serves as a collection of example code and libraries that demonstrate
|
||||
various capabilities of JAX.
|
||||
- 0.1.74
|
||||
- 5.0.0
|
||||
* - ``jax.experimental``
|
||||
- Namespace for experimental features and APIs that are in development or
|
||||
are not yet fully stable for production use.
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.lib``
|
||||
- Set of internal tools and types for bridging between JAX’s Python
|
||||
frontend and its XLA backend.
|
||||
- 0.4.6
|
||||
- 5.3.0
|
||||
* - ``jax_triton``
|
||||
- Library that integrates the Triton deep learning compiler with JAX.
|
||||
- jax_triton 0.2.0
|
||||
- 6.2.4
|
||||
|
||||
jax.scipy module
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
A SciPy-like API for scientific computing.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.scipy.cluster``
|
||||
- 0.3.11
|
||||
- 5.1.0
|
||||
* - ``jax.scipy.fft``
|
||||
- 0.1.71
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.integrate``
|
||||
- 0.4.15
|
||||
- 5.5.0
|
||||
* - ``jax.scipy.interpolate``
|
||||
- 0.1.76
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.linalg``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.ndimage``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.optimize``
|
||||
- 0.1.57
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.signal``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.spatial.transform``
|
||||
- 0.4.12
|
||||
- 5.4.0
|
||||
* - ``jax.scipy.sparse.linalg``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.special``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
|
||||
jax.scipy.stats module
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.scipy.stats.bernouli``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.beta``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.betabinom``
|
||||
- 0.1.61
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.binom``
|
||||
- 0.4.14
|
||||
- 5.4.0
|
||||
* - ``jax.scipy.stats.cauchy``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.chi2``
|
||||
- 0.1.61
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.dirichlet``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.expon``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.gamma``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.gennorm``
|
||||
- 0.3.15
|
||||
- 5.2.0
|
||||
* - ``jax.scipy.stats.geom``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.laplace``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.logistic``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.multinomial``
|
||||
- 0.3.18
|
||||
- 5.1.0
|
||||
* - ``jax.scipy.stats.multivariate_normal``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.nbinom``
|
||||
- 0.1.72
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.norm``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.pareto``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.poisson``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.t``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.truncnorm``
|
||||
- 0.4.0
|
||||
- 5.3.0
|
||||
* - ``jax.scipy.stats.uniform``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.scipy.stats.vonmises``
|
||||
- 0.4.2
|
||||
- 5.3.0
|
||||
* - ``jax.scipy.stats.wrapcauchy``
|
||||
- 0.4.20
|
||||
- 5.6.0
|
||||
|
||||
jax.extend module
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
Modules for JAX extensions.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.extend.ffi``
|
||||
- 0.4.30
|
||||
- 6.0.0
|
||||
* - ``jax.extend.linear_util``
|
||||
- 0.4.17
|
||||
- 5.6.0
|
||||
* - ``jax.extend.mlir``
|
||||
- 0.4.26
|
||||
- 5.6.0
|
||||
* - ``jax.extend.random``
|
||||
- 0.4.15
|
||||
- 5.5.0
|
||||
|
||||
jax.experimental module
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
Experimental modules and APIs.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.experimental.checkify``
|
||||
- 0.1.75
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.compilation_cache.compilation_cache``
|
||||
- 0.1.68
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.custom_partitioning``
|
||||
- 0.4.0
|
||||
- 5.3.0
|
||||
* - ``jax.experimental.jet``
|
||||
- 0.1.56
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.key_reuse``
|
||||
- 0.4.26
|
||||
- 5.6.0
|
||||
* - ``jax.experimental.mesh_utils``
|
||||
- 0.1.76
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.multihost_utils``
|
||||
- 0.3.2
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.pallas``
|
||||
- 0.4.15
|
||||
- 5.5.0
|
||||
* - ``jax.experimental.pjit``
|
||||
- 0.1.61
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.serialize_executable``
|
||||
- 0.4.0
|
||||
- 5.3.0
|
||||
* - ``jax.experimental.shard_map``
|
||||
- 0.4.3
|
||||
- 5.3.0
|
||||
* - ``jax.experimental.sparse``
|
||||
- 0.1.75
|
||||
- 5.0.0
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - API
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.experimental.enable_x64``
|
||||
- 0.1.60
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.disable_x64``
|
||||
- 0.1.60
|
||||
- 5.0.0
|
||||
|
||||
jax.experimental.pallas module
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Module for Pallas, a JAX extension for custom kernels.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.experimental.pallas.mosaic_gpu``
|
||||
- 0.4.31
|
||||
- 6.1.3
|
||||
* - ``jax.experimental.pallas.tpu``
|
||||
- 0.4.15
|
||||
- 5.5.0
|
||||
* - ``jax.experimental.pallas.triton``
|
||||
- 0.4.32
|
||||
- 6.1.3
|
||||
|
||||
jax.experimental.sparse module
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Experimental support for sparse matrix operations.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.experimental.sparse.linalg``
|
||||
- 0.3.15
|
||||
- 5.2.0
|
||||
* - ``jax.experimental.sparse.sparsify``
|
||||
- 0.3.25
|
||||
- ❌
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ``sparse`` data structure API
|
||||
- Since JAX
|
||||
- Since ROCm
|
||||
* - ``jax.experimental.sparse.BCOO``
|
||||
- 0.1.72
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.sparse.BCSR``
|
||||
- 0.3.20
|
||||
- 5.1.0
|
||||
* - ``jax.experimental.sparse.CSR``
|
||||
- 0.1.75
|
||||
- 5.0.0
|
||||
* - ``jax.experimental.sparse.NM``
|
||||
- 0.4.27
|
||||
- 5.6.0
|
||||
* - ``jax.experimental.sparse.COO``
|
||||
- 0.1.75
|
||||
- 5.0.0
|
||||
|
||||
Unsupported JAX features
|
||||
------------------------
|
||||
|
||||
The following are GPU-accelerated JAX features not currently supported by
|
||||
ROCm.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since JAX
|
||||
* - Mixed Precision with TF32
|
||||
- Mixed precision with TF32 is used for matrix multiplications,
|
||||
convolutions, and other linear algebra operations, particularly in
|
||||
deep learning workloads like CNNs and transformers.
|
||||
- 0.2.25
|
||||
* - RNN support
|
||||
- Currently only LSTM with double bias is supported with float32 input
|
||||
and weight.
|
||||
- 0.3.25
|
||||
* - XLA int4 support
|
||||
- 4-bit integer (int4) precision in the XLA compiler.
|
||||
- 0.4.0
|
||||
* - ``jax.experimental.sparsify``
|
||||
- Converts a dense matrix to a sparse matrix representation.
|
||||
- Experimental
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
|
||||
blog explores the implementation and training of a Generative Pre-trained
|
||||
Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s PyTorch-based
|
||||
nanoGPT. By comparing how essential GPT components—such as self-attention
|
||||
mechanisms and optimizers—are realized in PyTorch and JAX, also highlight
|
||||
JAX’s unique features.
|
||||
|
||||
* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
|
||||
ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
|
||||
blog post provides a comprehensive guide on enhancing the training efficiency
|
||||
of GPT models by implementing mixed precision techniques in JAX, specifically
|
||||
tailored for AMD GPUs utilizing the ROCm platform.
|
||||
|
||||
* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
|
||||
blog demonstrates how to develop a custom fused dropout-activation kernel for
|
||||
matrices using Triton, integrate it with JAX, and benchmark its performance
|
||||
using ROCm.
|
||||
|
||||
* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
|
||||
outlines the process of fine-tuning a Bidirectional Encoder Representations
|
||||
from Transformers (BERT)-based large language model (LLM) using JAX for a text
|
||||
classification task. The blog post discuss techniques for parallelizing the
|
||||
fine-tuning across multiple AMD GPUs and assess the model's performance on a
|
||||
holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
|
||||
and the General Language Understanding Evaluation (GLUE) benchmark dataset was
|
||||
used on a multi-GPU setup.
|
||||
|
||||
* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. The page is aimed at helping users achieve optimal
|
||||
performance for deep learning and other high-performance computing tasks on
|
||||
the MI300X GPU.
|
||||
|
||||
For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
|
||||
@@ -1,953 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: PyTorch compatibility
|
||||
:keywords: GPU, PyTorch compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
PyTorch compatibility
|
||||
********************************************************************************
|
||||
|
||||
`PyTorch <https://pytorch.org/>`_ is an open-source tensor library designed for
|
||||
deep learning. PyTorch on ROCm provides mixed-precision and large-scale training
|
||||
using `MIOpen <https://github.com/ROCm/MIOpen>`_ and
|
||||
`RCCL <https://github.com/ROCm/rccl>`_ libraries.
|
||||
|
||||
ROCm support for PyTorch is upstreamed into the official PyTorch repository. Due
|
||||
to independent compatibility considerations, this results in two distinct
|
||||
release cycles for PyTorch on ROCm:
|
||||
|
||||
- ROCm PyTorch release:
|
||||
|
||||
- Provides the latest version of ROCm but might not necessarily support the
|
||||
latest stable PyTorch version.
|
||||
|
||||
- Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
|
||||
preinstalled.
|
||||
|
||||
- ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`_
|
||||
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
to get started.
|
||||
|
||||
- Official PyTorch release:
|
||||
|
||||
- Provides the latest stable version of PyTorch but might not necessarily
|
||||
support the latest ROCm version.
|
||||
|
||||
- Official PyTorch repository: `<https://github.com/pytorch/pytorch>`_
|
||||
|
||||
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`_
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_
|
||||
to get started.
|
||||
|
||||
PyTorch includes tooling that generates HIP source code from the CUDA backend.
|
||||
This approach allows PyTorch to support ROCm without requiring manual code
|
||||
modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
|
||||
|
||||
ROCm development is aligned with the stable release of PyTorch, while upstream
|
||||
PyTorch testing uses the stable release of ROCm to maintain consistency.
|
||||
|
||||
.. _pytorch-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
|
||||
guides how to leverage the ROCm platform for training AI models. It covers the
|
||||
steps, tools, and best practices for optimizing training workflows on AMD GPUs
|
||||
using PyTorch features.
|
||||
|
||||
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates how to use the ROCm platform for the fine-tuning
|
||||
and inference of machine learning models, particularly large language models
|
||||
(LLMs), on systems with a single GPU. This topic provides a detailed guide for
|
||||
setting up, optimizing, and executing fine-tuning and inference workflows in
|
||||
such environments.
|
||||
|
||||
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates the fine-tuning and inference of machine learning
|
||||
models on systems with multiple GPUs.
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. This guide helps users achieve optimal performance for
|
||||
deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the
|
||||
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
|
||||
GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
|
||||
|
||||
.. _pytorch-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: PyTorch Docker image components
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- Apex
|
||||
- torchvision
|
||||
- TensorBoard
|
||||
- MAGMA
|
||||
- UCX
|
||||
- OMPI
|
||||
- OFED
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-ab1d350b818b90123cfda31363019d11c0d41a8f12a19e3cb2cb40cf0261137d"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-130536fdfceb374626a7bcb8d00b9d796ddfc3115677d51229e5b852d96b5ef4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-20a2e24b4738dc1f1a44a04f23827918b56c99f7e697e6fccb90e9c4fae8ca9b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-f09cb8ca39cc39222fb554060711f5c19130f7b4047aaf41fad4ba3ec470ca03"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- 22.04
|
||||
- `3.11.9 <https://www.python.org/downloads/release/python-3119/>`_
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-a91c100d1fe608dae3eb7f60a751630363d4027ac3d077d428e92945204c338e"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-66a89ce6485bb887af74bb9bd76bb613ab9834a6b1374649ea7ae379883454a4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-c716cf167e6e49893f11de03606ed37044153aca089e74ca615065c06877f86b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-0434cbc9b07b2c26e39480d7447f676f9057a1054dcff00e0050c25a6eddbd3c"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
|
||||
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-688b1c0073092615fb98778d78b16191e506097ee116a2d3d2628b264d5d367b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
|
||||
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
Key ROCm libraries for PyTorch
|
||||
================================================================================
|
||||
|
||||
PyTorch functionality on ROCm is determined by its underlying library
|
||||
dependencies. These ROCm components affect the capabilities, performance, and
|
||||
feature set available to developers.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
- Used in
|
||||
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
|
||||
- :version-ref:`"Composable Kernel" rocm_version`
|
||||
- Enables faster execution of core operations like matrix multiplication
|
||||
(GEMM), convolutions and transformations.
|
||||
- Speeds up ``torch.permute``, ``torch.view``, ``torch.matmul``,
|
||||
``torch.mm``, ``torch.bmm``, ``torch.nn.Conv2d``, ``torch.nn.Conv3d``
|
||||
and ``torch.nn.MultiheadAttention``.
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Supports operations such as matrix multiplication, matrix-vector
|
||||
products, and tensor contractions. Utilized in both dense and batched
|
||||
linear algebra operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
|
||||
matrix multiplications used in convolutional and linear layers.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Supports operations such as ``torch.sum``, ``torch.cumsum``,
|
||||
``torch.sort`` irregular shapes often involve scanning, sorting, and
|
||||
filtering, which hipCUB handles efficiently.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
- Used in functions like the ``torch.fft`` module.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
- The ``torch.rand``, ``torch.randn``, and stochastic layers like
|
||||
``torch.nn.Dropout`` rely on hipRAND.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
singular value decompositions (SVD).
|
||||
- Supports functions like ``torch.linalg.solve``,
|
||||
``torch.linalg.eig``, and ``torch.linalg.svd``.
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
- Sparse tensor operations ``torch.sparse``.
|
||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
||||
- :version-ref:`hipSPARSELt rocm_version`
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
- Sparse tensor operations ``torch.sparse``.
|
||||
* - `hipTensor <https://github.com/ROCm/hipTensor>`_
|
||||
- :version-ref:`hipTensor rocm_version`
|
||||
- Optimizes for high-performance tensor operations, such as contractions.
|
||||
- Accelerates tensor algebra, especially in deep learning and scientific
|
||||
computing.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Optimizes deep learning primitives such as convolutions, pooling,
|
||||
normalization, and activation functions.
|
||||
- Speeds up convolutional neural networks (CNNs), recurrent neural
|
||||
networks (RNNs), and other layers. Used in operations like
|
||||
``torch.nn.Conv2d``, ``torch.nn.ReLU``, and ``torch.nn.LSTM``.
|
||||
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
|
||||
- :version-ref:`MIGraphX rocm_version`
|
||||
- Adds graph-level optimizations, ONNX models and mixed precision support
|
||||
and enable Ahead-of-Time (AOT) Compilation.
|
||||
- Speeds up inference models and executes ONNX models for
|
||||
compatibility with other frameworks.
|
||||
``torch.nn.Conv2d``, ``torch.nn.ReLU``, and ``torch.nn.LSTM``.
|
||||
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
|
||||
- :version-ref:`MIVisionX rocm_version`
|
||||
- Optimizes acceleration for computer vision and AI workloads like
|
||||
preprocessing, augmentation, and inferencing.
|
||||
- Faster data preprocessing and augmentation pipelines for datasets like
|
||||
ImageNet or COCO and easy to integrate into PyTorch's ``torch.utils.data``
|
||||
and ``torchvision`` workflows.
|
||||
* - `rocAL <https://github.com/ROCm/rocAL>`_
|
||||
- :version-ref:`rocAL rocm_version`
|
||||
- Accelerates the data pipeline by offloading intensive preprocessing and
|
||||
augmentation tasks. rocAL is part of MIVisionX.
|
||||
- Easy to integrate into PyTorch's ``torch.utils.data`` and
|
||||
``torchvision`` data load workloads.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||
Broadcast.
|
||||
- Distributed data parallel training (``torch.nn.parallel.DistributedDataParallel``).
|
||||
Handles communication in multi-GPU setups.
|
||||
* - `rocDecode <https://github.com/ROCm/rocDecode>`_
|
||||
- :version-ref:`rocDecode rocm_version`
|
||||
- Provides hardware-accelerated data decoding capabilities, particularly
|
||||
for image, video, and other dataset formats.
|
||||
- Can be integrated in ``torch.utils.data``, ``torchvision.transforms``
|
||||
and ``torch.distributed``.
|
||||
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
|
||||
- :version-ref:`rocJPEG rocm_version`
|
||||
- Provides hardware-accelerated JPEG image decoding and encoding.
|
||||
- GPU accelerated ``torchvision.io.decode_jpeg`` and
|
||||
``torchvision.io.encode_jpeg`` and can be integrated in
|
||||
``torch.utils.data`` and ``torchvision``.
|
||||
* - `RPP <https://github.com/ROCm/RPP>`_
|
||||
- :version-ref:`RPP rocm_version`
|
||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||
- Easy to integrate into PyTorch's ``torch.utils.data`` and
|
||||
``torchvision`` data load workloads to speed up data processing.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
- Utilized in backend operations for tensor computations requiring
|
||||
parallel processing.
|
||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
|
||||
- :version-ref:`rocWMMA rocm_version`
|
||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||
multiplication (GEMM) and accumulation operations with mixed precision
|
||||
support.
|
||||
- Linear layers (``torch.nn.Linear``), convolutional layers
|
||||
(``torch.nn.Conv2d``), attention layers, general tensor operations that
|
||||
involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
|
||||
more.
|
||||
|
||||
Supported features
|
||||
================================================================================
|
||||
|
||||
This section maps GPU-accelerated PyTorch features to their supported ROCm and
|
||||
PyTorch versions.
|
||||
|
||||
torch
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
`torch <https://pytorch.org/docs/stable/index.html>`_ is the central module of
|
||||
PyTorch, providing data structures for multi-dimensional tensors and
|
||||
implementing mathematical operations on them. It also includes utilities for
|
||||
efficient serialization of tensors and arbitrary data types and other tools.
|
||||
|
||||
Tensor data types
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The tensor data type is specified using the ``dtype`` attribute or argument.
|
||||
PyTorch supports many data types for different use cases.
|
||||
|
||||
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_
|
||||
single data types:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``torch.float8_e4m3fn``
|
||||
- 8-bit floating point, e4m3
|
||||
- 2.3
|
||||
- 5.5
|
||||
* - ``torch.float8_e5m2``
|
||||
- 8-bit floating point, e5m2
|
||||
- 2.3
|
||||
- 5.5
|
||||
* - ``torch.float16`` or ``torch.half``
|
||||
- 16-bit floating point
|
||||
- 0.1.6
|
||||
- 2.0
|
||||
* - ``torch.bfloat16``
|
||||
- 16-bit floating point
|
||||
- 1.6
|
||||
- 2.6
|
||||
* - ``torch.float32`` or ``torch.float``
|
||||
- 32-bit floating point
|
||||
- 0.1.12_2
|
||||
- 2.0
|
||||
* - ``torch.float64`` or ``torch.double``
|
||||
- 64-bit floating point
|
||||
- 0.1.12_2
|
||||
- 2.0
|
||||
* - ``torch.complex32`` or ``torch.chalf``
|
||||
- PyTorch provides native support for 32-bit complex numbers
|
||||
- 1.6
|
||||
- 2.0
|
||||
* - ``torch.complex64`` or ``torch.cfloat``
|
||||
- PyTorch provides native support for 64-bit complex numbers
|
||||
- 1.6
|
||||
- 2.0
|
||||
* - ``torch.complex128`` or ``torch.cdouble``
|
||||
- PyTorch provides native support for 128-bit complex numbers
|
||||
- 1.6
|
||||
- 2.0
|
||||
* - ``torch.uint8``
|
||||
- 8-bit integer (unsigned)
|
||||
- 0.1.12_2
|
||||
- 2.0
|
||||
* - ``torch.uint16``
|
||||
- 16-bit integer (unsigned)
|
||||
- 2.3
|
||||
- Not natively supported
|
||||
* - ``torch.uint32``
|
||||
- 32-bit integer (unsigned)
|
||||
- 2.3
|
||||
- Not natively supported
|
||||
* - ``torch.uint64``
|
||||
- 32-bit integer (unsigned)
|
||||
- 2.3
|
||||
- Not natively supported
|
||||
* - ``torch.int8``
|
||||
- 8-bit integer (signed)
|
||||
- 1.12
|
||||
- 5.0
|
||||
* - ``torch.int16`` or ``torch.short``
|
||||
- 16-bit integer (signed)
|
||||
- 0.1.12_2
|
||||
- 2.0
|
||||
* - ``torch.int32`` or ``torch.int``
|
||||
- 32-bit integer (signed)
|
||||
- 0.1.12_2
|
||||
- 2.0
|
||||
* - ``torch.int64`` or ``torch.long``
|
||||
- 64-bit integer (signed)
|
||||
- 0.1.12_2
|
||||
- 2.0
|
||||
* - ``torch.bool``
|
||||
- Boolean
|
||||
- 1.2
|
||||
- 2.0
|
||||
* - ``torch.quint8``
|
||||
- Quantized 8-bit integer (unsigned)
|
||||
- 1.8
|
||||
- 5.0
|
||||
* - ``torch.qint8``
|
||||
- Quantized 8-bit integer (signed)
|
||||
- 1.8
|
||||
- 5.0
|
||||
* - ``torch.qint32``
|
||||
- Quantized 32-bit integer (signed)
|
||||
- 1.8
|
||||
- 5.0
|
||||
* - ``torch.quint4x2``
|
||||
- Quantized 4-bit integer (unsigned)
|
||||
- 1.8
|
||||
- 5.0
|
||||
|
||||
.. note::
|
||||
|
||||
Unsigned types except ``uint8`` have limited support in eager mode. They
|
||||
primarily exist to assist usage with ``torch.compile``.
|
||||
|
||||
See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
|
||||
native hardware support of data types.
|
||||
|
||||
torch.cuda
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
``torch.cuda`` in PyTorch is a module that provides utilities and functions for
|
||||
managing and utilizing AMD and NVIDIA GPUs. It enables GPU-accelerated
|
||||
computations, memory management, and efficient execution of tensor operations,
|
||||
leveraging ROCm and CUDA as the underlying frameworks.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - Device management
|
||||
- Utilities for managing and interacting with GPUs.
|
||||
- 0.4.0
|
||||
- 3.8
|
||||
* - Tensor operations on GPU
|
||||
- Performs tensor operations such as addition and matrix multiplications on
|
||||
the GPU.
|
||||
- 0.4.0
|
||||
- 3.8
|
||||
* - Streams and events
|
||||
- Streams allow overlapping computation and communication for optimized
|
||||
performance. Events enable synchronization.
|
||||
- 1.6.0
|
||||
- 3.8
|
||||
* - Memory management
|
||||
- Functions to manage and inspect memory usage like
|
||||
``torch.cuda.memory_allocated()``, ``torch.cuda.max_memory_allocated()``,
|
||||
``torch.cuda.memory_reserved()`` and ``torch.cuda.empty_cache()``.
|
||||
- 0.3.0
|
||||
- 1.9.2
|
||||
* - Running process lists of memory management
|
||||
- Returns a human-readable printout of the running processes and their GPU
|
||||
memory use for a given device with functions like
|
||||
``torch.cuda.memory_stats()`` and ``torch.cuda.memory_summary()``.
|
||||
- 1.8.0
|
||||
- 4.0
|
||||
* - Communication collectives
|
||||
- Set of APIs that enable efficient communication between multiple GPUs,
|
||||
allowing for distributed computing and data parallelism.
|
||||
- 1.9.0
|
||||
- 5.0
|
||||
* - ``torch.cuda.CUDAGraph``
|
||||
- Graphs capture sequences of GPU operations to minimize kernel launch
|
||||
overhead and improve performance.
|
||||
- 1.10.0
|
||||
- 5.3
|
||||
* - TunableOp
|
||||
- A mechanism that allows certain operations to be more flexible and
|
||||
optimized for performance. It enables automatic tuning of kernel
|
||||
configurations and other settings to achieve the best possible
|
||||
performance based on the specific hardware (GPU) and workload.
|
||||
- 2.0
|
||||
- 5.4
|
||||
* - NVIDIA Tools Extension (NVTX)
|
||||
- Integration with NVTX for profiling and debugging GPU performance using
|
||||
NVIDIA's Nsight tools.
|
||||
- 1.8.0
|
||||
- ❌
|
||||
* - Lazy loading NVRTC
|
||||
- Delays JIT compilation with NVRTC until the code is explicitly needed.
|
||||
- 1.13.0
|
||||
- ❌
|
||||
* - Jiterator (beta)
|
||||
- Jiterator allows asynchronous data streaming into computation streams
|
||||
during training loops.
|
||||
- 1.13.0
|
||||
- 5.2
|
||||
|
||||
.. Need to validate and extend.
|
||||
|
||||
torch.backends.cuda
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
``torch.backends.cuda`` is a PyTorch module that provides configuration options
|
||||
and flags to control the behavior of ROCm or CUDA operations. It is part of the
|
||||
PyTorch backend configuration system, which allows users to fine-tune how
|
||||
PyTorch interacts with the ROCm or CUDA environment.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``cufft_plan_cache``
|
||||
- Manages caching of GPU FFT plans to optimize repeated FFT computations.
|
||||
- 1.7.0
|
||||
- 5.0
|
||||
* - ``matmul.allow_tf32``
|
||||
- Enables or disables the use of TensorFloat-32 (TF32) precision for
|
||||
faster matrix multiplications on GPUs with Tensor Cores.
|
||||
- 1.10.0
|
||||
- ❌
|
||||
* - ``matmul.allow_fp16_reduced_precision_reduction``
|
||||
- Reduced precision reductions (e.g., with fp16 accumulation type) are
|
||||
allowed with fp16 GEMMs.
|
||||
- 2.0
|
||||
- ❌
|
||||
* - ``matmul.allow_bf16_reduced_precision_reduction``
|
||||
- Reduced precision reductions are allowed with bf16 GEMMs.
|
||||
- 2.0
|
||||
- ❌
|
||||
* - ``enable_cudnn_sdp``
|
||||
- Globally enables cuDNN SDPA's kernels within SDPA.
|
||||
- 2.0
|
||||
- ❌
|
||||
* - ``enable_flash_sdp``
|
||||
- Globally enables or disables FlashAttention for SDPA.
|
||||
- 2.1
|
||||
- ❌
|
||||
* - ``enable_mem_efficient_sdp``
|
||||
- Globally enables or disables Memory-Efficient Attention for SDPA.
|
||||
- 2.1
|
||||
- ❌
|
||||
* - ``enable_math_sdp``
|
||||
- Globally enables or disables the PyTorch C++ implementation within SDPA.
|
||||
- 2.1
|
||||
- ❌
|
||||
|
||||
.. Need to validate and extend.
|
||||
|
||||
torch.backends.cudnn
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Supported ``torch`` options include:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Option
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``allow_tf32``
|
||||
- TensorFloat-32 tensor cores may be used in cuDNN convolutions on NVIDIA
|
||||
Ampere or newer GPUs.
|
||||
- 1.12.0
|
||||
- ❌
|
||||
* - ``deterministic``
|
||||
- A bool that, if True, causes cuDNN to only use deterministic
|
||||
convolution algorithms.
|
||||
- 1.12.0
|
||||
- 6.0
|
||||
|
||||
Automatic mixed precision: torch.amp
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
PyTorch automates the process of using both 16-bit (half-precision, float16) and
|
||||
32-bit (single-precision, float32) floating-point types in model training and
|
||||
inference.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - Autocasting
|
||||
- Autocast instances serve as context managers or decorators that allow
|
||||
regions of your script to run in mixed precision.
|
||||
- 1.9
|
||||
- 2.5
|
||||
* - Gradient scaling
|
||||
- To prevent underflow, “gradient scaling” multiplies the network’s
|
||||
loss by a scale factor and invokes a backward pass on the scaled
|
||||
loss. The same factor then scales gradients flowing backward through
|
||||
the network. In other words, gradient values have a larger magnitude so
|
||||
that they don’t flush to zero.
|
||||
- 1.9
|
||||
- 2.5
|
||||
* - CUDA op-specific behavior
|
||||
- These ops always go through autocasting whether they are invoked as part
|
||||
of a ``torch.nn.Module``, as a function, or as a ``torch.Tensor`` method. If
|
||||
functions are exposed in multiple namespaces, they go through
|
||||
autocasting regardless of the namespace.
|
||||
- 1.9
|
||||
- 2.5
|
||||
|
||||
Distributed library features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
PyTorch distributed library includes a collective of parallelism modules, a
|
||||
communications layer, and infrastructure for launching and debugging large
|
||||
training jobs. See :ref:`rocm-for-ai-pytorch-distributed` for more information.
|
||||
|
||||
The Distributed Library feature in PyTorch provides tools and APIs for building
|
||||
and running distributed machine learning workflows. It allows training models
|
||||
across multiple processes, GPUs, or nodes in a cluster, enabling efficient use
|
||||
of computational resources and scalability for large-scale tasks.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - TensorPipe
|
||||
- A point-to-point communication library integrated into
|
||||
PyTorch for distributed training. It handles tensor data transfers
|
||||
efficiently between different processes or devices, including those on
|
||||
separate machines.
|
||||
- 1.8
|
||||
- 5.4
|
||||
* - Gloo
|
||||
- Designed for multi-machine and multi-GPU setups, enabling
|
||||
efficient communication and synchronization between processes. Gloo is
|
||||
one of the default backends for PyTorch's Distributed Data Parallel
|
||||
(DDP) and RPC frameworks, alongside other backends like NCCL and MPI.
|
||||
- 1.0
|
||||
- 2.0
|
||||
|
||||
torch.compiler
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``torch.compiler`` (AOT Autograd)
|
||||
- Autograd captures not only the user-level code, but also backpropagation,
|
||||
which results in capturing the backwards pass “ahead-of-time”. This
|
||||
enables acceleration of both forwards and backwards pass using
|
||||
``TorchInductor``.
|
||||
- 2.0
|
||||
- 5.3
|
||||
* - ``torch.compiler`` (TorchInductor)
|
||||
- The default ``torch.compile`` deep learning compiler that generates fast
|
||||
code for multiple accelerators and backends. You need to use a backend
|
||||
compiler to make speedups through ``torch.compile`` possible. For AMD,
|
||||
NVIDIA, and Intel GPUs, it leverages OpenAI Triton as the key building block.
|
||||
- 2.0
|
||||
- 5.3
|
||||
|
||||
torchaudio
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchaudio <https://pytorch.org/audio/stable/index.html>`_ library provides
|
||||
utilities for processing audio data in PyTorch, such as audio loading,
|
||||
transformations, and feature extraction.
|
||||
|
||||
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to
|
||||
explicitly move audio data (waveform tensor) to GPU using ``.to('cuda')``.
|
||||
|
||||
The following ``torchaudio`` features are GPU-accelerated.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of torchaudio version
|
||||
- As of ROCm
|
||||
* - ``torchaudio.transforms.Spectrogram``
|
||||
- Generate a spectrogram of an input waveform using STFT.
|
||||
- 0.6.0
|
||||
- 4.5
|
||||
* - ``torchaudio.transforms.MelSpectrogram``
|
||||
- Generates the mel-scale spectrogram of raw audio signals.
|
||||
- 0.9.0
|
||||
- 4.5
|
||||
* - ``torchaudio.transforms.MFCC``
|
||||
- Extract of MFCC features.
|
||||
- 0.9.0
|
||||
- 4.5
|
||||
* - ``torchaudio.transforms.Resample``
|
||||
- Resamples a signal from one frequency to another.
|
||||
- 0.9.0
|
||||
- 4.5
|
||||
|
||||
torchvision
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchvision <https://pytorch.org/vision/stable/index.html>`_ library
|
||||
provides datasets, model architectures, and common image transformations for
|
||||
computer vision.
|
||||
|
||||
The following ``torchvision`` features are GPU-accelerated.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of torchvision version
|
||||
- As of ROCm
|
||||
* - ``torchvision.transforms.functional``
|
||||
- Provides GPU-compatible transformations for image preprocessing like
|
||||
resize, normalize, rotate and crop.
|
||||
- 0.2.0
|
||||
- 4.0
|
||||
* - ``torchvision.ops``
|
||||
- GPU-accelerated operations for object detection and segmentation tasks.
|
||||
``torchvision.ops.roi_align``, ``torchvision.ops.nms`` and
|
||||
``box_convert``.
|
||||
- 0.6.0
|
||||
- 3.3
|
||||
* - ``torchvision.models`` with ``.to('cuda')``
|
||||
- ``torchvision`` provides several pre-trained models (ResNet, Faster
|
||||
R-CNN, Mask R-CNN, ...) that can run on CUDA for faster inference and
|
||||
training.
|
||||
- 0.1.6
|
||||
- 2.x
|
||||
* - ``torchvision.io``
|
||||
- Enables video decoding and frame extraction using GPU acceleration with NVIDIA’s
|
||||
NVDEC and nvJPEG (rocJPEG) on CUDA-enabled GPUs.
|
||||
- 0.4.0
|
||||
- 6.3
|
||||
|
||||
torchtext
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchtext <https://pytorch.org/text/stable/index.html>`_ library provides
|
||||
utilities for processing and working with text data in PyTorch, including
|
||||
tokenization, vocabulary management, and text embeddings. torchtext supports
|
||||
preprocessing pipelines and integration with PyTorch models, simplifying the
|
||||
implementation of natural language processing (NLP) tasks.
|
||||
|
||||
To leverage GPU acceleration in torchtext, you need to move tensors
|
||||
explicitly to the GPU using ``.to('cuda')``.
|
||||
|
||||
* torchtext does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
torchtune
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchtune <https://pytorch.org/torchtune/stable/index.html>`_ library for
|
||||
authoring, fine-tuning and experimenting with LLMs.
|
||||
|
||||
* Usage: Enabling developers to fine-tune ROCm PyTorch solutions.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
torchserve
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchserve <https://pytorch.org/serve/>`_ is a PyTorch domain library
|
||||
for common sparsity and parallelism primitives needed for large-scale recommender
|
||||
systems.
|
||||
|
||||
* torchtext does not implement its own kernels. ROCm support is enabled by
|
||||
linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
torchrec
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchrec <https://pytorch.org/torchrec/>`_ is a PyTorch domain library for
|
||||
common sparsity and parallelism primitives needed for large-scale recommender
|
||||
systems.
|
||||
|
||||
* torchrec does not implement its own kernels. ROCm support is enabled by
|
||||
linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
Unsupported PyTorch features
|
||||
================================================================================
|
||||
|
||||
The following GPU-accelerated PyTorch features are not supported by ROCm for
|
||||
the listed supported PyTorch versions.
|
||||
|
||||
.. list-table::
|
||||
:widths: 30, 60, 10
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- As of PyTorch
|
||||
* - APEX batch norm
|
||||
- Use APEX batch norm instead of PyTorch batch norm.
|
||||
- 1.6.0
|
||||
* - ``torch.backends.cuda`` / ``matmul.allow_tf32``
|
||||
- A bool that controls whether TensorFloat-32 tensor cores may be used in
|
||||
matrix multiplications.
|
||||
- 1.7
|
||||
* - ``torch.cuda`` / NVIDIA Tools Extension (NVTX)
|
||||
- Integration with NVTX for profiling and debugging GPU performance using
|
||||
NVIDIA's Nsight tools.
|
||||
- 1.7.0
|
||||
* - ``torch.cuda`` / Lazy loading NVRTC
|
||||
- Delays JIT compilation with NVRTC until the code is explicitly needed.
|
||||
- 1.8.0
|
||||
* - ``torch-tensorrt``
|
||||
- Integrate TensorRT library for optimizing and deploying PyTorch models.
|
||||
ROCm does not have equialent library for TensorRT.
|
||||
- 1.9.0
|
||||
* - ``torch.backends`` / ``cudnn.allow_tf32``
|
||||
- TensorFloat-32 tensor cores may be used in cuDNN convolutions.
|
||||
- 1.10.0
|
||||
* - ``torch.backends.cuda`` / ``matmul.allow_fp16_reduced_precision_reduction``
|
||||
- Reduced precision reductions with fp16 accumulation type are
|
||||
allowed with fp16 GEMMs.
|
||||
- 2.0
|
||||
* - ``torch.backends.cuda`` / ``matmul.allow_bf16_reduced_precision_reduction``
|
||||
- Reduced precision reductions are allowed with bf16 GEMMs.
|
||||
- 2.0
|
||||
* - ``torch.nn.functional`` / ``scaled_dot_product_attention``
|
||||
- Flash attention backend for SDPA to accelerate attention computation in
|
||||
transformer-based models.
|
||||
- 2.0
|
||||
* - ``torch.backends.cuda`` / ``enable_cudnn_sdp``
|
||||
- Globally enables cuDNN SDPA's kernels within SDPA.
|
||||
- 2.0
|
||||
* - ``torch.backends.cuda`` / ``enable_flash_sdp``
|
||||
- Globally enables or disables FlashAttention for SDPA.
|
||||
- 2.1
|
||||
* - ``torch.backends.cuda`` / ``enable_mem_efficient_sdp``
|
||||
- Globally enables or disables Memory-Efficient Attention for SDPA.
|
||||
- 2.1
|
||||
* - ``torch.backends.cuda`` / ``enable_math_sdp``
|
||||
- Globally enables or disables the PyTorch C++ implementation within SDPA.
|
||||
- 2.1
|
||||
* - Dynamic parallelism
|
||||
- PyTorch itself does not directly expose dynamic parallelism as a core
|
||||
feature. Dynamic parallelism allow GPU threads to launch additional
|
||||
threads which can be reached using custom operations via the
|
||||
``torch.utils.cpp_extension`` module.
|
||||
- Not a core feature
|
||||
* - Unified memory support in PyTorch
|
||||
- Unified Memory is not directly exposed in PyTorch's core API, it can be
|
||||
utilized effectively through custom CUDA extensions or advanced
|
||||
workflows.
|
||||
- Not a core feature
|
||||
@@ -1,531 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: TensorFlow compatibility
|
||||
:keywords: GPU, TensorFlow compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
TensorFlow compatibility
|
||||
*******************************************************************************
|
||||
|
||||
`TensorFlow <https://www.tensorflow.org/>`_ is an open-source library for
|
||||
solving machine learning, deep learning, and AI problems. It can solve many
|
||||
problems across different sectors and industries but primarily focuses on
|
||||
neural network training and inference. It is one of the most popular and
|
||||
in-demand frameworks and is very active in open-source contribution and
|
||||
development.
|
||||
|
||||
The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`_
|
||||
includes full ROCm support. AMD maintains a TensorFlow `ROCm repository
|
||||
<http://github.com/rocm/tensorflow-upstream>`_ in order to quickly add bug
|
||||
fixes, updates, and support for the latest ROCM versions.
|
||||
|
||||
- ROCm TensorFlow release:
|
||||
|
||||
- Offers :ref:`Docker images <tensorflow-docker-compat>` with
|
||||
ROCm and TensorFlow pre-installed.
|
||||
|
||||
- ROCm TensorFlow repository: `<https://github.com/ROCm/tensorflow-upstream>`_
|
||||
|
||||
- See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
|
||||
to get started.
|
||||
|
||||
- Official TensorFlow release:
|
||||
|
||||
- Official TensorFlow repository: `<https://github.com/tensorflow/tensorflow>`_
|
||||
|
||||
- See the `TensorFlow API versions <https://www.tensorflow.org/versions>`_ list.
|
||||
|
||||
.. note::
|
||||
|
||||
The official TensorFlow documentation does not cover ROCm support. Use the
|
||||
ROCm documentation for installation instructions for Tensorflow on ROCm.
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/tensorflow-install`.
|
||||
|
||||
.. _tensorflow-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
===============================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `TensorFlow images
|
||||
<https://hub.docker.com/r/rocm/tensorflow>`_ with ROCm backends on
|
||||
Docker Hub. The following Docker image tags and associated inventories are
|
||||
validated for `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click
|
||||
the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: TensorFlow Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- TensorFlow
|
||||
- Ubuntu
|
||||
- Dev
|
||||
- Python
|
||||
- TensorBoard
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-dev/images/sha256-fa9cf5fa6c6079a7118727531ccd0056c6e3224a42c3d6e78a49e7781daafff4"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-runtime/images/sha256-14addca4b92a47c806b83ebaeed593fc6672cd99f0017ed8dad759fe72ed0309"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-dev/images/sha256-f5e151060df04ff5fb59f5604b49cd371931bbe75b06aec9fe7781397c4be0ce"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-runtime/images/sha256-5cd4c03fdb1036570c0d4929da60a65c4466998dc80f1dc8a5a0b173eae017fb"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-dev/images/sha256-b3add80e374a2db2d1088d746e740afa89d439aca02cacba959ad298f5cd2b3f"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-runtime/images/sha256-3a244f026c32177eff7958ffbad390de85b438b2b48b455cc39f15d70fa1270d"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-dev/images/sha256-e0cecdfacb59169335049983cdab6da578c209bb9f4d08aad97e184ae59171a6"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-runtime/images/sha256-6f43de12f7eb202791b698ac51d28b72098de90034dbcd48486629b0125f7707"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
|
||||
Critical ROCm libraries for TensorFlow
|
||||
===============================================================================
|
||||
|
||||
TensorFlow depends on multiple components and the supported features of those
|
||||
components can affect the TensorFlow ROCm supported feature set. The versions
|
||||
in the following table refer to the first TensorFlow version where the ROCm
|
||||
library was introduced as a dependency. The versions described
|
||||
are available in ROCm :version:`rocm_version`.
|
||||
|
||||
.. list-table::
|
||||
:widths: 25, 10, 35, 30
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- Purpose
|
||||
- Used in
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Accelerates operations like ``tf.matmul``, ``tf.linalg.matmul``, and
|
||||
other matrix multiplications commonly used in neural network layers.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- Extends hipBLAS with additional optimizations like fused kernels and
|
||||
integer tensor cores.
|
||||
- Optimizes matrix multiplications and linear algebra operations used in
|
||||
layers like dense, convolutional, and RNNs in TensorFlow.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Supports operations like ``tf.reduce_sum``, ``tf.cumsum``, ``tf.sort``
|
||||
and other tensor operations in TensorFlow, especially those involving
|
||||
scanning, sorting, and filtering.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Accelerates Fast Fourier Transforms (FFT) for signal processing tasks.
|
||||
- Used for operations like signal processing, image filtering, and
|
||||
certain types of neural networks requiring FFT-based transformations.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated direct linear solvers for dense and sparse
|
||||
systems.
|
||||
- Optimizes linear algebra functions such as solving systems of linear
|
||||
equations, often used in optimization and training tasks.
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- Optimizes sparse matrix operations for efficient computations on sparse
|
||||
data.
|
||||
- Accelerates sparse matrix operations in models with sparse weight
|
||||
matrices or activations, commonly used in neural networks.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- Provides optimized deep learning primitives such as convolutions,
|
||||
pooling,
|
||||
normalization, and activation functions.
|
||||
- Speeds up convolutional neural networks (CNNs) and other layers. Used
|
||||
in TensorFlow for layers like ``tf.nn.conv2d``, ``tf.nn.relu``, and
|
||||
``tf.nn.lstm_cell``.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||
Broadcast.
|
||||
- Distributed data parallel training (``tf.distribute.MirroredStrategy``).
|
||||
Handles communication in multi-GPU setups.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
- Reduction operations like ``tf.reduce_sum``, ``tf.cumsum`` for computing
|
||||
the cumulative sum of elements along a given axis or ``tf.unique`` to
|
||||
finds unique elements in a tensor can use rocThrust.
|
||||
|
||||
Supported and unsupported features
|
||||
===============================================================================
|
||||
|
||||
The following section maps supported data types and GPU-accelerated TensorFlow
|
||||
features to their minimum supported ROCm and TensorFlow versions.
|
||||
|
||||
Data types
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The data type of a tensor is specified using the ``dtype`` attribute or
|
||||
argument, and TensorFlow supports a wide range of data types for different use
|
||||
cases.
|
||||
|
||||
The basic, single data types of `tf.dtypes <https://www.tensorflow.org/api_docs/python/tf/dtypes>`_
|
||||
are as follows:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
- Since ROCm
|
||||
* - ``bfloat16``
|
||||
- 16-bit bfloat (brain floating point).
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``bool``
|
||||
- Boolean.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``complex128``
|
||||
- 128-bit complex.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``complex64``
|
||||
- 64-bit complex.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``double``
|
||||
- 64-bit (double precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``float16``
|
||||
- 16-bit (half precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``float32``
|
||||
- 32-bit (single precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``float64``
|
||||
- 64-bit (double precision) floating-point.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``half``
|
||||
- 16-bit (half precision) floating-point.
|
||||
- 2.0.0
|
||||
- 2.0
|
||||
* - ``int16``
|
||||
- Signed 16-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``int32``
|
||||
- Signed 32-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``int64``
|
||||
- Signed 64-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``int8``
|
||||
- Signed 8-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``qint16``
|
||||
- Signed quantized 16-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``qint32``
|
||||
- Signed quantized 32-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``qint8``
|
||||
- Signed quantized 8-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``quint16``
|
||||
- Unsigned quantized 16-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``quint8``
|
||||
- Unsigned quantized 8-bit integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``resource``
|
||||
- Handle to a mutable, dynamically allocated resource.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``string``
|
||||
- Variable-length string, represented as byte array.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``uint16``
|
||||
- Unsigned 16-bit (word) integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``uint32``
|
||||
- Unsigned 32-bit (dword) integer.
|
||||
- 1.5.0
|
||||
- 1.7
|
||||
* - ``uint64``
|
||||
- Unsigned 64-bit (qword) integer.
|
||||
- 1.5.0
|
||||
- 1.7
|
||||
* - ``uint8``
|
||||
- Unsigned 8-bit (byte) integer.
|
||||
- 1.0.0
|
||||
- 1.7
|
||||
* - ``variant``
|
||||
- Data of arbitrary type (known at runtime).
|
||||
- 1.4.0
|
||||
- 1.7
|
||||
|
||||
Features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This table provides an overview of key features in TensorFlow and their
|
||||
availability in ROCm.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
- Since ROCm
|
||||
* - ``tf.linalg`` (Linear Algebra)
|
||||
- Operations for matrix and tensor computations, such as
|
||||
``tf.linalg.matmul`` (matrix multiplication), ``tf.linalg.inv``
|
||||
(matrix inversion) and ``tf.linalg.cholesky`` (Cholesky decomposition).
|
||||
These leverage GPUs for high-performance linear algebra operations.
|
||||
- 1.4
|
||||
- 1.8.2
|
||||
* - ``tf.nn`` (Neural Network Operations)
|
||||
- GPU-accelerated building blocks for deep learning models, such as 2D
|
||||
convolutions with ``tf.nn.conv2d``, max pooling operations with
|
||||
``tf.nn.max_pool``, activation functions like ``tf.nn.relu`` or softmax
|
||||
for output layers with ``tf.nn.softmax``.
|
||||
- 1.0
|
||||
- 1.8.2
|
||||
* - ``tf.image`` (Image Processing)
|
||||
- GPU-accelerated functions for image preprocessing and augmentations,
|
||||
such as resize images with ``tf.image.resize``, flip images horizontally
|
||||
with ``tf.image.flip_left_right`` and adjust image brightness randomly
|
||||
with ``tf.image.random_brightness``.
|
||||
- 1.1
|
||||
- 1.8.2
|
||||
* - ``tf.keras`` (High-Level API)
|
||||
- GPU acceleration for Keras layers and models, including dense layers
|
||||
(``tf.keras.layers.Dense``), convolutional layers
|
||||
(``tf.keras.layers.Conv2D``) and recurrent layers
|
||||
(``tf.keras.layers.LSTM``).
|
||||
- 1.4
|
||||
- 1.8.2
|
||||
* - ``tf.math`` (Mathematical Operations)
|
||||
- GPU-accelerated mathematical operations, such as sum across dimensions
|
||||
with ``tf.math.reduce_sum``, elementwise exponentiation with
|
||||
``tf.math.exp`` and sigmoid activation (``tf.math.sigmoid``).
|
||||
- 1.5
|
||||
- 1.8.2
|
||||
* - ``tf.signal`` (Signal Processing)
|
||||
- Functions for spectral analysis and signal transformations.
|
||||
- 1.13
|
||||
- 2.1
|
||||
* - ``tf.data`` (Data Input Pipeline)
|
||||
- GPU-accelerated data preprocessing for efficient input pipelines,
|
||||
Prefetching with ``tf.data.experimental.AUTOTUNE``. GPU-enabled
|
||||
transformations like map and batch.
|
||||
- 1.4
|
||||
- 1.8.2
|
||||
* - ``tf.distribute`` (Distributed Training)
|
||||
- Enabling to scale computations across multiple devices on a single
|
||||
machine or across multiple machines.
|
||||
- 1.13
|
||||
- 2.1
|
||||
* - ``tf.random`` (Random Number Generation)
|
||||
- GPU-accelerated random number generation
|
||||
- 1.12
|
||||
- 1.9.2
|
||||
* - ``tf.TensorArray`` (Dynamic Array Operations)
|
||||
- Enables dynamic tensor manipulation on GPUs.
|
||||
- 1.0
|
||||
- 1.8.2
|
||||
* - ``tf.sparse`` (Sparse Tensor Operations)
|
||||
- GPU-accelerated sparse matrix manipulations.
|
||||
- 1.9
|
||||
- 1.9.0
|
||||
* - ``tf.experimental.numpy``
|
||||
- GPU-accelerated NumPy-like API for numerical computations.
|
||||
- 2.4
|
||||
- 4.1.1
|
||||
* - ``tf.RaggedTensor``
|
||||
- Handling of variable-length sequences and ragged tensors with GPU
|
||||
support.
|
||||
- 1.13
|
||||
- 2.1
|
||||
* - ``tf.function`` with XLA (Accelerated Linear Algebra)
|
||||
- Enable GPU-accelerated functions in optimization.
|
||||
- 1.14
|
||||
- 2.4
|
||||
* - ``tf.quantization``
|
||||
- Quantized operations for inference, accelerated on GPUs.
|
||||
- 1.12
|
||||
- 1.9.2
|
||||
|
||||
Distributed library features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Enables developers to scale computations across multiple devices on a single machine or
|
||||
across multiple machines.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
- Since ROCm
|
||||
* - ``MultiWorkerMirroredStrategy``
|
||||
- Synchronous training across multiple workers using mirrored variables.
|
||||
- 2.0
|
||||
- 3.0
|
||||
* - ``MirroredStrategy``
|
||||
- Synchronous training across multiple GPUs on one machine.
|
||||
- 1.5
|
||||
- 2.5
|
||||
* - ``TPUStrategy``
|
||||
- Efficiently trains models on Google TPUs.
|
||||
- 1.9
|
||||
- ❌
|
||||
* - ``ParameterServerStrategy``
|
||||
- Asynchronous training using parameter servers for variable management.
|
||||
- 2.1
|
||||
- 4.0
|
||||
* - ``CentralStorageStrategy``
|
||||
- Keeps variables on a single device and performs computation on multiple
|
||||
devices.
|
||||
- 2.3
|
||||
- 4.1
|
||||
* - ``CollectiveAllReduceStrategy``
|
||||
- Synchronous training across multiple devices and hosts.
|
||||
- 1.14
|
||||
- 3.5
|
||||
* - Distribution Strategies API
|
||||
- High-level API to simplify distributed training configuration and
|
||||
execution.
|
||||
- 1.10
|
||||
- 3.0
|
||||
|
||||
Unsupported TensorFlow features
|
||||
===============================================================================
|
||||
|
||||
The following are GPU-accelerated TensorFlow features not currently supported by
|
||||
ROCm.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since TensorFlow
|
||||
* - Mixed Precision with TF32
|
||||
- Mixed precision with TF32 is used for matrix multiplications,
|
||||
convolutions, and other linear algebra operations, particularly in
|
||||
deep learning workloads like CNNs and transformers.
|
||||
- 2.4
|
||||
* - ``tf.distribute.TPUStrategy``
|
||||
- Efficiently trains models on Google TPUs.
|
||||
- 1.9
|
||||
|
||||
Use cases and recommendations
|
||||
===============================================================================
|
||||
|
||||
* The `Training a Neural Collaborative Filtering (NCF) Recommender on an AMD
|
||||
GPU <https://rocm.blogs.amd.com/artificial-intelligence/ncf/README.html>`_
|
||||
blog post discusses training an NCF recommender system using TensorFlow. It
|
||||
explains how NCF improves traditional collaborative filtering methods by
|
||||
leveraging neural networks to model non-linear user-item interactions. The
|
||||
post outlines the implementation using the recommenders library, focusing on
|
||||
the use of implicit data (for example, user interactions like viewing or
|
||||
purchasing) and how it addresses challenges like the lack of negative values.
|
||||
|
||||
* The `Creating a PyTorch/TensorFlow code environment on AMD GPUs
|
||||
<https://rocm.blogs.amd.com/software-tools-optimization/pytorch-tensorflow-env/README.html>`_
|
||||
blog post provides instructions for creating a machine learning environment
|
||||
for PyTorch and TensorFlow on AMD GPUs using ROCm. It covers steps like
|
||||
installing the libraries, cloning code repositories, installing dependencies,
|
||||
and troubleshooting potential issues with CUDA-based code. Additionally, it
|
||||
explains how to HIPify code (port CUDA code to HIP) and manage Docker images
|
||||
for a better experience on AMD GPUs. This guide aims to help data scientists
|
||||
and ML practitioners adapt their code for AMD GPUs.
|
||||
|
||||
For more use cases and recommendations, see the `ROCm Tensorflow blog posts <https://rocm.blogs.amd.com/blog/tag/tensorflow.html>`_.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,407 +0,0 @@
|
||||
.. meta::
|
||||
:description: Using CMake
|
||||
:keywords: CMake, dependencies, HIP, C++, AMD, ROCm
|
||||
|
||||
*********************************
|
||||
Using CMake
|
||||
*********************************
|
||||
|
||||
Most components in ROCm support CMake. Projects depending on header-only or
|
||||
library components typically require CMake 3.5 or higher whereas those wanting
|
||||
to make use of the CMake HIP language support will require CMake 3.21 or higher.
|
||||
|
||||
Finding dependencies
|
||||
====================
|
||||
|
||||
.. note::
|
||||
|
||||
For a complete
|
||||
reference on how to deal with dependencies in CMake, refer to the CMake docs
|
||||
on `find_package
|
||||
<https://cmake.org/cmake/help/latest/command/find_package.html>`_ and the
|
||||
`Using Dependencies Guide
|
||||
<https://cmake.org/cmake/help/latest/guide/using-dependencies/index.html>`_
|
||||
to get an overview of CMake related facilities.
|
||||
|
||||
In short, CMake supports finding dependencies in two ways:
|
||||
|
||||
* In Module mode, it consults a file ``Find<PackageName>.cmake`` which tries to find the component
|
||||
in typical install locations and layouts. CMake ships a few dozen such scripts, but users and projects
|
||||
may ship them as well.
|
||||
|
||||
* In Config mode, it locates a file named ``<packagename>-config.cmake`` or
|
||||
``<PackageName>Config.cmake`` which describes the installed component in all regards needed to
|
||||
consume it.
|
||||
|
||||
ROCm predominantly relies on Config mode, one notable exception being the Module
|
||||
driving the compilation of HIP programs on NVIDIA runtimes. As such, when
|
||||
dependencies are not found in standard system locations, one either has to
|
||||
instruct CMake to search for package config files in additional folders using
|
||||
the ``CMAKE_PREFIX_PATH`` variable (a semi-colon separated list of file system
|
||||
paths), or using ``<PackageName>_ROOT`` variable on a project-specific basis.
|
||||
|
||||
There are nearly a dozen ways to set these variables. One may be more convenient
|
||||
over the other depending on your workflow. Conceptually the simplest is adding
|
||||
it to your CMake configuration command on the command line via
|
||||
``-D CMAKE_PREFIX_PATH=....`` . AMD packaged ROCm installs can typically be
|
||||
added to the config file search paths such as:
|
||||
|
||||
* Windows: ``-D CMAKE_PREFIX_PATH=${env:HIP_PATH}``
|
||||
|
||||
* Linux: ``-D CMAKE_PREFIX_PATH=/opt/rocm``
|
||||
|
||||
ROCm provides the respective *config-file* packages, and this enables
|
||||
``find_package`` to be used directly. ROCm does not require any Find module as
|
||||
the *config-file* packages are shipped with the upstream projects, such as
|
||||
rocPRIM and other ROCm libraries.
|
||||
|
||||
For a complete guide on where and how ROCm may be installed on a system, refer
|
||||
to the installation guides for
|
||||
`Linux <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html>`_
|
||||
and
|
||||
`Windows <https://rocm.docs.amd.com/projects/install-on-windows/en/latest/index.html>`_.
|
||||
|
||||
Using HIP in CMake
|
||||
==================
|
||||
|
||||
ROCm components providing a C/C++ interface support consumption via any
|
||||
C/C++ toolchain that CMake knows how to drive. ROCm also supports the CMake HIP
|
||||
language features, allowing users to program using the HIP single-source
|
||||
programming model. When a program (or translation-unit) uses the HIP API without
|
||||
compiling any GPU device code, HIP can be treated in CMake as a simple C/C++
|
||||
library.
|
||||
|
||||
Using the HIP single-source programming model
|
||||
---------------------------------------------
|
||||
|
||||
Source code written in the HIP dialect of C++ typically uses the `.hip`
|
||||
extension. When the HIP CMake language is enabled, it will automatically
|
||||
associate such source files with the HIP toolchain being used.
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
cmake_minimum_required(VERSION 3.21) # HIP language support requires 3.21
|
||||
cmake_policy(VERSION 3.21.3...3.27)
|
||||
project(MyProj LANGUAGES HIP)
|
||||
add_executable(MyApp Main.hip)
|
||||
|
||||
Should you have existing CUDA code that is from the source compatible subset of
|
||||
HIP, you can tell CMake that despite their `.cu` extension, they're HIP sources.
|
||||
Do note that this mostly facilitates compiling kernel code-only source files,
|
||||
as host-side CUDA API won't compile in this fashion.
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
add_library(MyLib MyLib.cu)
|
||||
set_source_files_properties(MyLib.cu PROPERTIES LANGUAGE HIP)
|
||||
|
||||
CMake itself only hosts part of the HIP language support, such as defining
|
||||
HIP-specific properties, etc. while the other half ships with the HIP
|
||||
implementation, such as ROCm. CMake will search for a file
|
||||
`hip-lang-config.cmake` describing how the the properties defined by CMake
|
||||
translate to toolchain invocations. If one installs ROCm using non-standard
|
||||
methods or layouts and CMake can't locate this file or detect parts of the SDK,
|
||||
there's a catch-all, last resort variable consulted locating this file,
|
||||
``-D CMAKE_HIP_COMPILER_ROCM_ROOT:PATH=`` which should be set the root of the
|
||||
ROCm installation.
|
||||
|
||||
.. note::
|
||||
Imported targets defined by `hip-lang-config.cmake` are for internal use
|
||||
only.
|
||||
|
||||
If the user doesn't provide a semi-colon delimited list of device architectures
|
||||
via ``CMAKE_HIP_ARCHITECTURES``, CMake will select some sensible default. It is
|
||||
advised though that if a user knows what devices they wish to target, then set
|
||||
this variable explicitly.
|
||||
|
||||
Consuming ROCm C/C++ libraries
|
||||
------------------------------
|
||||
|
||||
Libraries such as rocBLAS, rocFFT, MIOpen, etc. behave as C/C++ libraries.
|
||||
Illustrated in the example below is a C++ application using MIOpen from CMake.
|
||||
It calls ``find_package(miopen)``, which provides the ``MIOpen`` imported
|
||||
target. This can be linked with ``target_link_libraries``
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
cmake_minimum_required(VERSION 3.5) # find_package(miopen) requires 3.5
|
||||
cmake_policy(VERSION 3.5...3.27)
|
||||
project(MyProj LANGUAGES CXX)
|
||||
find_package(miopen)
|
||||
add_library(MyLib ...)
|
||||
target_link_libraries(MyLib PUBLIC MIOpen)
|
||||
|
||||
.. note::
|
||||
|
||||
Most libraries are designed as host-only API, so using a GPU device
|
||||
compiler is not necessary for downstream projects unless they use GPU device
|
||||
code.
|
||||
|
||||
Consuming the HIP API in C++ code
|
||||
---------------------------------
|
||||
|
||||
Consuming the HIP API without compiling single-source GPU device code can be
|
||||
done using any C++ compiler. The ``find_package(hip)`` provides the
|
||||
``hip::host`` imported target to use HIP in this scenario.
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
cmake_minimum_required(VERSION 3.5) # find_package(hip) requires 3.5
|
||||
cmake_policy(VERSION 3.5...3.27)
|
||||
project(MyProj LANGUAGES CXX)
|
||||
find_package(hip REQUIRED)
|
||||
add_executable(MyApp ...)
|
||||
target_link_libraries(MyApp PRIVATE hip::host)
|
||||
|
||||
When mixing such ``CXX`` sources with ``HIP`` sources holding device-code, link
|
||||
only to `hip::host`. If HIP sources don't have `.hip` as their extension, use
|
||||
`set_source_files_properties(<hip_sources>... PROPERTIES LANGUAGE HIP)` on them.
|
||||
Linking to `hip::host` will set all the necessary flags for the ``CXX`` sources
|
||||
while ``HIP`` sources inherit all flags from the built-in language support.
|
||||
Having HIP sources in a target will turn the |LINK_LANG|_ into ``HIP``.
|
||||
|
||||
.. |LINK_LANG| replace:: ``LINKER_LANGUAGE``
|
||||
.. _LINK_LANG: https://cmake.org/cmake/help/latest/prop_tgt/LINKER_LANGUAGE.html
|
||||
|
||||
Compiling device code in C++ language mode
|
||||
------------------------------------------
|
||||
|
||||
.. attention::
|
||||
|
||||
The workflow detailed here is considered legacy and is shown for
|
||||
understanding's sake. It pre-dates the existence of HIP language support in
|
||||
CMake. If source code has HIP device code in it, it is a HIP source file
|
||||
and should be compiled as such. Only resort to the method below if your
|
||||
HIP-enabled CMake code path can't mandate CMake version 3.21.
|
||||
|
||||
If code uses the HIP API and compiles GPU device code, it requires using a
|
||||
device compiler. The compiler for CMake can be set using either the
|
||||
``CMAKE_C_COMPILER`` and ``CMAKE_CXX_COMPILER`` variable or using the ``CC``
|
||||
and ``CXX`` environment variables. This can be set when configuring CMake or
|
||||
put into a CMake toolchain file. The device compiler must be set to a
|
||||
compiler that supports AMD GPU targets, which is usually Clang.
|
||||
|
||||
The ``find_package(hip)`` provides the ``hip::device`` imported target to add
|
||||
all the flags necessary for device compilation.
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
cmake_minimum_required(VERSION 3.8) # cxx_std_11 requires 3.8
|
||||
cmake_policy(VERSION 3.8...3.27)
|
||||
project(MyProj LANGUAGES CXX)
|
||||
find_package(hip REQUIRED)
|
||||
add_library(MyLib ...)
|
||||
target_link_libraries(MyLib PRIVATE hip::device)
|
||||
target_compile_features(MyLib PRIVATE cxx_std_11)
|
||||
|
||||
.. note::
|
||||
|
||||
Compiling for the GPU device requires at least C++11.
|
||||
|
||||
This project can then be configured with the following CMake commands:
|
||||
|
||||
* Windows: ``cmake -D CMAKE_CXX_COMPILER:PATH=${env:HIP_PATH}\bin\clang++.exe``
|
||||
* Linux: ``cmake -D CMAKE_CXX_COMPILER:PATH=/opt/rocm/bin/amdclang++``
|
||||
|
||||
Which use the device compiler provided from the binary packages of
|
||||
`ROCm HIP SDK <https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html>`_ and
|
||||
`repo.radeon.com <https://repo.radeon.com>`_ respectively.
|
||||
|
||||
When using the ``CXX`` language support to compile HIP device code, selecting the
|
||||
target GPU architectures is done via setting the ``GPU_TARGETS`` variable.
|
||||
``CMAKE_HIP_ARCHITECTURES`` only exists when the HIP language is enabled. By
|
||||
default, this is set to some subset of the currently supported architectures of
|
||||
AMD ROCm. It can be set to the CMake option ``-D GPU_TARGETS="gfx1032;gfx1035"``.
|
||||
|
||||
ROCm CMake packages
|
||||
-------------------
|
||||
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| Component | Package | Targets |
|
||||
+===========+==========+========================================================+
|
||||
| HIP | hip | ``hip::host``, ``hip::device`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocPRIM | rocprim | ``roc::rocprim`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocThrust | rocthrust| ``roc::rocthrust`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| hipCUB | hipcub | ``hip::hipcub`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocRAND | rocrand | ``roc::rocrand`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocBLAS | rocblas | ``roc::rocblas`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocSOLVER | rocsolver| ``roc::rocsolver`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| hipBLAS | hipblas | ``roc::hipblas`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocFFT | rocfft | ``roc::rocfft`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| hipFFT | hipfft | ``hip::hipfft`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocSPARSE | rocsparse| ``roc::rocsparse`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| hipSPARSE | hipsparse| ``roc::hipsparse`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| rocALUTION|rocalution| ``roc::rocalution`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| RCCL | rccl | ``rccl`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| MIOpen | miopen | ``MIOpen`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
| MIGraphX | migraphx | ``migraphx::migraphx``, ``migraphx::migraphx_c``, |
|
||||
| | | ``migraphx::migraphx_cpu``, ``migraphx::migraphx_gpu``,|
|
||||
| | | ``migraphx::migraphx_onnx``, ``migraphx::migraphx_tf`` |
|
||||
+-----------+----------+--------------------------------------------------------+
|
||||
|
||||
Using CMake presets
|
||||
===================
|
||||
|
||||
CMake command lines depending on how specific users like to be when compiling
|
||||
code can grow to unwieldy lengths. This is the primary reason why projects tend
|
||||
to bake script snippets into their build definitions controlling compiler
|
||||
warning levels, changing CMake defaults (``CMAKE_BUILD_TYPE`` or
|
||||
``BUILD_SHARED_LIBS`` just to name a few) and all sorts anti-patterns, all in
|
||||
the name of convenience.
|
||||
|
||||
Load on the command-line interface (CLI) starts immediately by selecting a
|
||||
toolchain, the set of utilities used to compile programs. To ease some of the
|
||||
toolchain related pains, CMake does consult the ``CC`` and ``CXX`` environmental
|
||||
variables when setting a default ``CMAKE_C[XX]_COMPILER`` respectively, but that
|
||||
is just the tip of the iceberg. There's a fair number of variables related to
|
||||
just the toolchain itself (typically supplied using
|
||||
`toolchain files <https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html>`_
|
||||
), and then we still haven't talked about user preference or project-specific
|
||||
options.
|
||||
|
||||
IDEs supporting CMake (Visual Studio, Visual Studio Code, CLion, etc.) all came
|
||||
up with their own way to register command-line fragments of different purpose in
|
||||
a setup-and-forget fashion for quick assembly using graphical front-ends. This is
|
||||
all nice, but configurations aren't portable, nor can they be reused in
|
||||
Continuous Integration (CI) pipelines. CMake has condensed existing practice
|
||||
into a portable JSON format that works in all IDEs and can be invoked from any
|
||||
command line. This is
|
||||
`CMake Presets <https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html>`_.
|
||||
|
||||
There are two types of preset files: one supplied by the project, called
|
||||
``CMakePresets.json`` which is meant to be committed to version control,
|
||||
typically used to drive CI; and one meant for the user to provide, called
|
||||
``CMakeUserPresets.json``, typically used to house user preference and adapting
|
||||
the build to the user's environment. These JSON files are allowed to include
|
||||
other JSON files and the user presets always implicitly includes the non-user
|
||||
variant.
|
||||
|
||||
Using HIP with presets
|
||||
----------------------
|
||||
|
||||
Following is an example ``CMakeUserPresets.json`` file which actually compiles
|
||||
the `amd/rocm-examples <https://github.com/amd/rocm-examples>`_ suite of sample
|
||||
applications on a typical ROCm installation:
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"version": 3,
|
||||
"cmakeMinimumRequired": {
|
||||
"major": 3,
|
||||
"minor": 21,
|
||||
"patch": 0
|
||||
},
|
||||
"configurePresets": [
|
||||
{
|
||||
"name": "layout",
|
||||
"hidden": true,
|
||||
"binaryDir": "${sourceDir}/build/${presetName}",
|
||||
"installDir": "${sourceDir}/install/${presetName}"
|
||||
},
|
||||
{
|
||||
"name": "generator-ninja-multi-config",
|
||||
"hidden": true,
|
||||
"generator": "Ninja Multi-Config"
|
||||
},
|
||||
{
|
||||
"name": "toolchain-makefiles-c/c++-amdclang",
|
||||
"hidden": true,
|
||||
"cacheVariables": {
|
||||
"CMAKE_C_COMPILER": "/opt/rocm/bin/amdclang",
|
||||
"CMAKE_CXX_COMPILER": "/opt/rocm/bin/amdclang++",
|
||||
"CMAKE_HIP_COMPILER": "/opt/rocm/bin/amdclang++"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "clang-strict-iso-high-warn",
|
||||
"hidden": true,
|
||||
"cacheVariables": {
|
||||
"CMAKE_C_FLAGS": "-Wall -Wextra -pedantic",
|
||||
"CMAKE_CXX_FLAGS": "-Wall -Wextra -pedantic",
|
||||
"CMAKE_HIP_FLAGS": "-Wall -Wextra -pedantic"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ninja-mc-rocm",
|
||||
"displayName": "Ninja Multi-Config ROCm",
|
||||
"inherits": [
|
||||
"layout",
|
||||
"generator-ninja-multi-config",
|
||||
"toolchain-makefiles-c/c++-amdclang",
|
||||
"clang-strict-iso-high-warn"
|
||||
]
|
||||
}
|
||||
],
|
||||
"buildPresets": [
|
||||
{
|
||||
"name": "ninja-mc-rocm-debug",
|
||||
"displayName": "Debug",
|
||||
"configuration": "Debug",
|
||||
"configurePreset": "ninja-mc-rocm"
|
||||
},
|
||||
{
|
||||
"name": "ninja-mc-rocm-release",
|
||||
"displayName": "Release",
|
||||
"configuration": "Release",
|
||||
"configurePreset": "ninja-mc-rocm"
|
||||
},
|
||||
{
|
||||
"name": "ninja-mc-rocm-debug-verbose",
|
||||
"displayName": "Debug (verbose)",
|
||||
"configuration": "Debug",
|
||||
"configurePreset": "ninja-mc-rocm",
|
||||
"verbose": true
|
||||
},
|
||||
{
|
||||
"name": "ninja-mc-rocm-release-verbose",
|
||||
"displayName": "Release (verbose)",
|
||||
"configuration": "Release",
|
||||
"configurePreset": "ninja-mc-rocm",
|
||||
"verbose": true
|
||||
}
|
||||
],
|
||||
"testPresets": [
|
||||
{
|
||||
"name": "ninja-mc-rocm-debug",
|
||||
"displayName": "Debug",
|
||||
"configuration": "Debug",
|
||||
"configurePreset": "ninja-mc-rocm",
|
||||
"execution": {
|
||||
"jobs": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ninja-mc-rocm-release",
|
||||
"displayName": "Release",
|
||||
"configuration": "Release",
|
||||
"configurePreset": "ninja-mc-rocm",
|
||||
"execution": {
|
||||
"jobs": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
.. note::
|
||||
|
||||
Getting presets to work reliably on Windows requires some CMake improvements
|
||||
and/or support from compiler vendors. (Refer to
|
||||
`Add support to the Visual Studio generators <https://gitlab.kitware.com/cmake/cmake/-/issues/24245>`_
|
||||
and `Sourcing environment scripts <https://gitlab.kitware.com/cmake/cmake/-/issues/21619>`_
|
||||
.)
|
||||
@@ -1,14 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="AMD ROCm documentation">
|
||||
<meta name="keywords" content="documentation, guides, installation, compatibility, support,
|
||||
reference, ROCm, AMD">
|
||||
</head>
|
||||
|
||||
# Using compiler features
|
||||
|
||||
The following topics describe using specific features of the compilation tools:
|
||||
|
||||
* [ROCm compiler infrastructure](https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html)
|
||||
* [Using AddressSanitizer](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html)
|
||||
* [OpenMP support](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html)
|
||||
@@ -1,172 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="ROCm Linux Filesystem Hierarchy Standard reorganization">
|
||||
<meta name="keywords" content="FHS, Linux Filesystem Hierarchy Standard, directory structure,
|
||||
AMD, ROCm">
|
||||
</head>
|
||||
|
||||
# ROCm Linux Filesystem Hierarchy Standard reorganization
|
||||
|
||||
## Introduction
|
||||
|
||||
The ROCm Software has adopted the Linux Filesystem Hierarchy Standard (FHS) [https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html](https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html) in order to to ensure ROCm is consistent with standard open source conventions. The following sections specify how current and future releases of ROCm adhere to FHS, how the previous ROCm file system is supported, and how improved versioning specifications are applied to ROCm.
|
||||
|
||||
## Adopting the FHS
|
||||
|
||||
In order to standardize ROCm directory structure and directory content layout ROCm has adopted the [FHS](https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html), adhering to open source conventions for Linux-based distribution. FHS ensures internal consistency within the ROCm stack, as well as external consistency with other systems and distributions. The ROCm proposed file structure is outlined below:
|
||||
|
||||
```none
|
||||
/opt/rocm-<ver>
|
||||
| -- bin
|
||||
| -- all public binaries
|
||||
| -- lib
|
||||
| -- lib<soname>.so->lib<soname>.so.major->lib<soname>.so.major.minor.patch
|
||||
(public libaries to link with applications)
|
||||
| -- <component>
|
||||
| -- architecture dependent libraries and binaries used internally by components
|
||||
| -- cmake
|
||||
| -- <component>
|
||||
| --<component>-config.cmake
|
||||
| -- libexec
|
||||
| -- <component>
|
||||
| -- non ISA/architecture independent executables used internally by components
|
||||
| -- include
|
||||
| -- <component>
|
||||
| -- public header files
|
||||
| -- share
|
||||
| -- html
|
||||
| -- <component>
|
||||
| -- html documentation
|
||||
| -- info
|
||||
| -- <component>
|
||||
| -- info files
|
||||
| -- man
|
||||
| -- <component>
|
||||
| -- man pages
|
||||
| -- doc
|
||||
| -- <component>
|
||||
| -- license files
|
||||
| -- <component>
|
||||
| -- samples
|
||||
| -- architecture independent misc files
|
||||
```
|
||||
|
||||
## Changes from earlier ROCm versions
|
||||
|
||||
The following table provides a brief overview of the new ROCm FHS layout, compared to the layout of earlier ROCm versions. Note that /opt/ is used to denote the default rocm-installation-path and should be replaced in case of a non-standard installation location of the ROCm distribution.
|
||||
|
||||
```none
|
||||
______________________________________________________
|
||||
| New ROCm Layout | Previous ROCm Layout |
|
||||
|_____________________________|________________________|
|
||||
| /opt/rocm-<ver> | /opt/rocm-<ver> |
|
||||
| | -- bin | | -- bin |
|
||||
| | -- lib | | -- lib |
|
||||
| | -- cmake | | -- include |
|
||||
| | -- libexec | | -- <component_1> |
|
||||
| | -- include | | -- bin |
|
||||
| | -- <component_1> | | -- cmake |
|
||||
| | -- share | | -- doc |
|
||||
| | -- html | | -- lib |
|
||||
| | -- info | | -- include |
|
||||
| | -- man | | -- samples |
|
||||
| | -- doc | | -- <component_n> |
|
||||
| | -- <component_1> | | -- bin |
|
||||
| | -- samples | | -- cmake |
|
||||
| | -- .. | | -- doc |
|
||||
| | -- <component_n> | | -- lib |
|
||||
| | -- samples | | -- include |
|
||||
| | -- .. | | -- samples |
|
||||
|______________________________________________________|
|
||||
```
|
||||
|
||||
## ROCm FHS reorganization: backward compatibility
|
||||
|
||||
The FHS file organization for ROCm was first introduced in the release of ROCm 5.2 . Backward compatibility was implemented to make sure users could still run their ROCm applications while transitioning to the new FHS. ROCm has moved header files and libraries to their new locations as indicated in the above structure, and included symbolic-links and wrapper header files in their old location for backward compatibility. The following sections detail ROCm backward compatibility implementation for wrapper header files, executable files, library files and CMake config files.
|
||||
|
||||
### Wrapper header files
|
||||
|
||||
Wrapper header files are placed in the old location (
|
||||
`/opt/rocm-<ver>/<component>/include`) with a warning message to include files
|
||||
from the new location (`/opt/rocm-<ver>/include`) as shown in the example below.
|
||||
|
||||
```cpp
|
||||
#pragma message "This file is deprecated. Use file from include path /opt/rocm-ver/include/ and prefix with hip."
|
||||
#include <hip/hip_runtime.h>
|
||||
```
|
||||
|
||||
* Starting at ROCm 5.2 release, the deprecation for backward compatibility wrapper header files is: `#pragma` message announcing `#warning`.
|
||||
* Starting from ROCm 6.0 (tentatively) backward compatibility for wrapper header files will be removed, and the `#pragma` message will be announcing `#error`.
|
||||
|
||||
### Executable files
|
||||
|
||||
Executable files are available in the `/opt/rocm-<ver>/bin` folder. For backward
|
||||
compatibility, the old library location (`/opt/rocm-<ver>/<component>/bin`) has a
|
||||
soft link to the library at the new location. Soft links will be removed in a
|
||||
future release, tentatively ROCm v6.0.
|
||||
|
||||
```bash
|
||||
$ ls -l /opt/rocm/hip/bin/
|
||||
lrwxrwxrwx 1 root root 24 Jan 1 23:32 hipcc -> ../../bin/hipcc
|
||||
```
|
||||
|
||||
### Library files
|
||||
|
||||
Library files are available in the `/opt/rocm-<ver>/lib` folder. For backward
|
||||
compatibility, the old library location (`/opt/rocm-<ver>/<component>/lib`) has a
|
||||
soft link to the library at the new location. Soft links will be removed in a
|
||||
future release, tentatively ROCm v6.0.
|
||||
|
||||
```shell
|
||||
$ ls -l /opt/rocm/hip/lib/
|
||||
drwxr-xr-x 4 root root 4096 Jan 1 10:45 cmake
|
||||
lrwxrwxrwx 1 root root 24 Jan 1 23:32 libamdhip64.so -> ../../lib/libamdhip64.so
|
||||
```
|
||||
|
||||
### CMake config files
|
||||
|
||||
All CMake configuration files are available in the
|
||||
`/opt/rocm-<ver>/lib/cmake/<component>` folder. For backward compatibility, the
|
||||
old CMake locations (`/opt/rocm-<ver>/<component>/lib/cmake`) consist of a soft
|
||||
link to the new CMake config. Soft links will be removed in a future release,
|
||||
tentatively ROCm v6.0.
|
||||
|
||||
```shell
|
||||
$ ls -l /opt/rocm/hip/lib/cmake/hip/
|
||||
lrwxrwxrwx 1 root root 42 Jan 1 23:32 hip-config.cmake -> ../../../../lib/cmake/hip/hip-config.cmake
|
||||
```
|
||||
|
||||
## Changes required in applications using ROCm
|
||||
|
||||
Applications using ROCm are advised to use the new file paths. As the old files
|
||||
will be deprecated in a future release. Applications have to make sure to include
|
||||
correct header file and use correct search paths.
|
||||
|
||||
1. `#include<header_file.h>` needs to be changed to
|
||||
`#include <component/header_file.h>`
|
||||
|
||||
For example: `#include <hip.h>` needs to change
|
||||
to `#include <hip/hip.h>`
|
||||
|
||||
2. Any variable in CMake or Makefiles pointing to component folder needs to
|
||||
changed.
|
||||
|
||||
For example: `VAR1=/opt/rocm/hip` needs to be changed to `VAR1=/opt/rocm`
|
||||
`VAR2=/opt/rocm/hsa` needs to be changed to `VAR2=/opt/rocm`
|
||||
|
||||
3. Any reference to `/opt/rocm/<component>/bin` or `/opt/rocm/<component>/lib`
|
||||
needs to be changed to `/opt/rocm/bin` and `/opt/rocm/lib/`, respectively.
|
||||
|
||||
## Changes in versioning specifications
|
||||
|
||||
In order to better manage ROCm dependencies specification and allow smoother releases of ROCm while avoiding dependency conflicts, ROCm software shall adhere to the following scheme when numbering and incrementing ROCm files versions:
|
||||
|
||||
rocm-\<ver\>, where \<ver\> = \<x.y.z\>
|
||||
|
||||
x.y.z denote: MAJOR.MINOR.PATCH
|
||||
|
||||
z: PATCH - increment z when implementing backward compatible bug fixes.
|
||||
|
||||
y: MINOR - increment y when implementing minor changes that add functionality but are still backward compatible.
|
||||
|
||||
x: MAJOR - increment x when implementing major changes that are not backward compatible.
|
||||
@@ -1,72 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="GPU architecture">
|
||||
<meta name="keywords" content="GPU architecture, architecture support, MI200, MI250, RDNA,
|
||||
MI100, AMD Instinct">
|
||||
</head>
|
||||
|
||||
(gpu-arch-documentation)=
|
||||
|
||||
# GPU architecture documentation
|
||||
|
||||
:::::{grid} 1 1 2 2
|
||||
:gutter: 1
|
||||
|
||||
:::{grid-item-card}
|
||||
**AMD Instinct MI300 series**
|
||||
|
||||
Review hardware aspects of the AMD Instinct™ MI300 series of GPU accelerators and the CDNA™ 3
|
||||
architecture.
|
||||
|
||||
* [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md)
|
||||
* [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf)
|
||||
* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf)
|
||||
* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
**AMD Instinct MI200 series**
|
||||
|
||||
Review hardware aspects of the AMD Instinct™ MI200 series of GPU accelerators and the CDNA™ 2
|
||||
architecture.
|
||||
|
||||
* [AMD Instinct™ MI250 microarchitecture](./gpu-arch/mi250.md)
|
||||
* [AMD Instinct MI200/CDNA2 ISA](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf)
|
||||
* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf)
|
||||
* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
|
||||
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
**AMD Instinct MI100**
|
||||
|
||||
Review hardware aspects of the AMD Instinct™ MI100 series of GPU accelerators and the CDNA™ 1
|
||||
architecture.
|
||||
|
||||
* [AMD Instinct™ MI100 microarchitecture](./gpu-arch/mi100.md)
|
||||
* [AMD Instinct MI100/CDNA1 ISA](https://www.amd.com/system/files/TechDocs/instinct-mi100-cdna1-shader-instruction-set-architecture%C2%A0.pdf)
|
||||
* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf)
|
||||
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
**RDNA**
|
||||
|
||||
* [AMD RDNA4 ISA](https://www.amd.com/content/dam/amd/en/documents/radeon-tech-docs/instruction-set-architectures/rdna4-instruction-set-architecture.pdf)
|
||||
* [AMD RDNA3 ISA](https://www.amd.com/system/files/TechDocs/rdna3-shader-instruction-set-architecture-feb-2023_0.pdf)
|
||||
* [AMD RDNA2 ISA](https://www.amd.com/system/files/TechDocs/rdna2-shader-instruction-set-architecture.pdf)
|
||||
* [AMD RDNA ISA](https://www.amd.com/system/files/TechDocs/rdna-shader-instruction-set-architecture.pdf)
|
||||
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
**Older architectures**
|
||||
|
||||
* [AMD Instinct MI50/Vega 7nm ISA](https://www.amd.com/system/files/TechDocs/vega-7nm-shader-instruction-set-architecture.pdf)
|
||||
* [AMD Instinct MI25/Vega ISA](https://www.amd.com/system/files/TechDocs/vega-shader-instruction-set-architecture.pdf)
|
||||
* [AMD GCN3 ISA](https://www.amd.com/system/files/TechDocs/gcn3-instruction-set-architecture.pdf)
|
||||
* [AMD Vega Architecture White Paper](https://en.wikichip.org/w/images/a/a1/vega-whitepaper.pdf)
|
||||
|
||||
:::
|
||||
|
||||
:::::
|
||||
@@ -1,95 +0,0 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Learn about the AMD Instinct MI100 series architecture."
|
||||
"keywords": "Instinct, MI100, microarchitecture, AMD, ROCm"
|
||||
---
|
||||
|
||||
# AMD Instinct™ MI100 microarchitecture
|
||||
|
||||
The following image shows the node-level architecture of a system that
|
||||
comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ accelerators.
|
||||
The two EPYC processors are connected to each other with the AMD Infinity™
|
||||
fabric which provides a high-bandwidth (up to 18 GT/sec) and coherent links such
|
||||
that each processor can access the available node memory as a single
|
||||
shared-memory domain in a non-uniform memory architecture (NUMA) fashion. In a
|
||||
2P, or dual-socket, configuration, three AMD Infinity™ fabric links are
|
||||
available to connect the processors plus one PCIe Gen 4 x16 link per processor
|
||||
can attach additional I/O devices such as the host adapters for the network
|
||||
fabric.
|
||||
|
||||

|
||||
|
||||
In a typical node configuration, each processor can host up to four AMD
|
||||
Instinct™ accelerators that are attached using PCIe Gen 4 links at 16 GT/sec,
|
||||
which corresponds to a peak bidirectional link bandwidth of 32 GB/sec. Each hive
|
||||
of four accelerators can participate in a fully connected, coherent AMD
|
||||
Instinct™ fabric that connects the four accelerators using 23 GT/sec AMD
|
||||
Infinity fabric links that run at a higher frequency than the inter-processor
|
||||
links. This inter-GPU link can be established in certified server systems if the
|
||||
GPUs are mounted in neighboring PCIe slots by installing the AMD Infinity
|
||||
Fabric™ bridge for the AMD Instinct™ accelerators.
|
||||
|
||||
## Microarchitecture
|
||||
|
||||
The microarchitecture of the AMD Instinct accelerators is based on the AMD CDNA
|
||||
architecture, which targets compute applications such as high-performance
|
||||
computing (HPC) and AI & machine learning (ML) that run on everything from
|
||||
individual servers to the world's largest exascale supercomputers. The overall
|
||||
system architecture is designed for extreme scalability and compute performance.
|
||||
|
||||
")
|
||||
|
||||
The above image shows the AMD Instinct accelerator with its PCIe Gen 4 x16
|
||||
link (16 GT/sec, at the bottom) that connects the GPU to (one of) the host
|
||||
processor(s). It also shows the three AMD Infinity Fabric ports that provide
|
||||
high-speed links (23 GT/sec, also at the bottom) to the other GPUs of the local
|
||||
hive.
|
||||
|
||||
On the left and right of the floor plan, the High Bandwidth Memory (HBM)
|
||||
attaches via the GPU memory controller. The MI100 generation of the AMD
|
||||
Instinct accelerator offers four stacks of HBM generation 2 (HBM2) for a total
|
||||
of 32GB with a 4,096bit-wide memory interface. The peak memory bandwidth of the
|
||||
attached HBM2 is 1.228 TB/sec at a memory clock frequency of 1.2 GHz.
|
||||
|
||||
The execution units of the GPU are depicted in the above image as Compute
|
||||
Units (CU). There are a total 120 compute units that are physically organized
|
||||
into eight Shader Engines (SE) with fifteen compute units per shader engine.
|
||||
Each compute unit is further sub-divided into four SIMD units that process SIMD
|
||||
instructions of 16 data elements per instruction. This enables the CU to process
|
||||
64 data elements (a so-called 'wavefront') at a peak clock frequency of 1.5 GHz.
|
||||
Therefore, the theoretical maximum FP64 peak performance is 11.5 TFLOPS
|
||||
(`4 [SIMD units] x 16 [elements per instruction] x 120 [CU] x 1.5 [GHz]`).
|
||||
|
||||

|
||||
|
||||
The preceding image shows the block diagram of a single CU of an AMD Instinct™
|
||||
MI100 accelerator and summarizes how instructions flow through the execution
|
||||
engines. The CU fetches the instructions via a 32KB instruction cache and moves
|
||||
them forward to execution via a dispatcher. The CU can handle up to ten
|
||||
wavefronts at a time and feed their instructions into the execution unit. The
|
||||
execution unit contains 256 vector general-purpose registers (VGPR) and 800
|
||||
scalar general-purpose registers (SGPR). The VGPR and SGPR are dynamically
|
||||
allocated to the executing wavefronts. A wavefront can access a maximum of 102
|
||||
scalar registers. Excess scalar-register usage will cause register spilling and
|
||||
thus may affect execution performance.
|
||||
|
||||
A wavefront can occupy any number of VGPRs from 0 to 256, directly affecting
|
||||
occupancy; that is, the number of concurrently active wavefronts in the CU. For
|
||||
instance, with 119 VGPRs used, only two wavefronts can be active in the CU at
|
||||
the same time. With the instruction latency of four cycles per SIMD instruction,
|
||||
the occupancy should be as high as possible such that the compute unit can
|
||||
improve execution efficiency by scheduling instructions from multiple
|
||||
wavefronts.
|
||||
|
||||
:::{table} Peak-performance capabilities of MI100 for different data types.
|
||||
:name: mi100-perf
|
||||
| Computation and Data Type | FLOPS/CLOCK/CU | Peak TFLOPS |
|
||||
| :------------------------ | :------------: | ----------: |
|
||||
| Vector FP64 | 64 | 11.5 |
|
||||
| Matrix FP32 | 256 | 46.1 |
|
||||
| Vector FP32 | 128 | 23.1 |
|
||||
| Matrix FP16 | 1024 | 184.6 |
|
||||
| Matrix BF16 | 512 | 92.3 |
|
||||
|
||||
:::
|
||||
@@ -1,134 +0,0 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Learn about the AMD Instinct MI250 series architecture."
|
||||
"keywords": "Instinct, MI250, microarchitecture, AMD, ROCm"
|
||||
---
|
||||
|
||||
# AMD Instinct™ MI250 microarchitecture
|
||||
|
||||
The microarchitecture of the AMD Instinct MI250 accelerators is based on the
|
||||
AMD CDNA 2 architecture that targets compute applications such as HPC,
|
||||
artificial intelligence (AI), and machine learning (ML) and that run on
|
||||
everything from individual servers to the world’s largest exascale
|
||||
supercomputers. The overall system architecture is designed for extreme
|
||||
scalability and compute performance.
|
||||
|
||||
The following image shows the components of a single Graphics Compute Die (GCD) of the CDNA 2 architecture. On the top and the bottom are AMD Infinity Fabric™
|
||||
interfaces and their physical links that are used to connect the GPU die to the
|
||||
other system-level components of the node (see also Section 2.2). Both
|
||||
interfaces can drive four AMD Infinity Fabric links. One of the AMD Infinity
|
||||
Fabric links of the controller at the bottom can be configured as a PCIe link.
|
||||
Each of the AMD Infinity Fabric links between GPUs can run at up to 25 GT/sec,
|
||||
which correlates to a peak transfer bandwidth of 50 GB/sec for a 16-wide link (
|
||||
two bytes per transaction). Section 2.2 has more details on the number of AMD
|
||||
Infinity Fabric links and the resulting transfer rates between the system-level
|
||||
components.
|
||||
|
||||
To the left and the right are memory controllers that attach the High Bandwidth
|
||||
Memory (HBM) modules to the GCD. AMD Instinct MI250 GPUs use HBM2e, which offers
|
||||
a peak memory bandwidth of 1.6 TB/sec per GCD.
|
||||
|
||||
The execution units of the GPU are depicted in the following image as Compute
|
||||
Units (CU). The MI250 GCD has 104 active CUs. Each compute unit is further
|
||||
subdivided into four SIMD units that process SIMD instructions of 16 data
|
||||
elements per instruction (for the FP64 data type). This enables the CU to
|
||||
process 64 work items (a so-called “wavefront”) at a peak clock frequency of 1.7
|
||||
GHz. Therefore, the theoretical maximum FP64 peak performance per GCD is 22.6
|
||||
TFLOPS for vector instructions. This equates to 45.3 TFLOPS for vector instructions for both GCDs together. The MI250 compute units also provide specialized
|
||||
execution units (also called matrix cores), which are geared toward executing
|
||||
matrix operations like matrix-matrix multiplications. For FP64, the peak
|
||||
performance of these units amounts to 90.5 TFLOPS.
|
||||
|
||||

|
||||
|
||||
```{list-table} Peak-performance capabilities of the MI250 OAM for different data types.
|
||||
:header-rows: 1
|
||||
:name: mi250-perf-table
|
||||
|
||||
*
|
||||
- Computation and Data Type
|
||||
- FLOPS/CLOCK/CU
|
||||
- Peak TFLOPS
|
||||
*
|
||||
- Matrix FP64
|
||||
- 256
|
||||
- 90.5
|
||||
*
|
||||
- Vector FP64
|
||||
- 128
|
||||
- 45.3
|
||||
*
|
||||
- Matrix FP32
|
||||
- 256
|
||||
- 90.5
|
||||
*
|
||||
- Packed FP32
|
||||
- 256
|
||||
- 90.5
|
||||
*
|
||||
- Vector FP32
|
||||
- 128
|
||||
- 45.3
|
||||
*
|
||||
- Matrix FP16
|
||||
- 1024
|
||||
- 362.1
|
||||
*
|
||||
- Matrix BF16
|
||||
- 1024
|
||||
- 362.1
|
||||
*
|
||||
- Matrix INT8
|
||||
- 1024
|
||||
- 362.1
|
||||
```
|
||||
|
||||
The above table summarizes the aggregated peak performance of the AMD
|
||||
Instinct MI250 OCP Open Accelerator Modules (OAM, OCP is short for Open Compute
|
||||
Platform) and its two GCDs for different data types and execution units. The
|
||||
middle column lists the peak performance (number of data elements processed in a
|
||||
single instruction) of a single compute unit if a SIMD (or matrix) instruction
|
||||
is being retired in each clock cycle. The third column lists the theoretical
|
||||
peak performance of the OAM module. The theoretical aggregated peak memory
|
||||
bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
|
||||
|
||||

|
||||
|
||||
The following image shows the block diagram of an OAM package that consists
|
||||
of two GCDs, each of which constitutes one GPU device in the system. The two
|
||||
GCDs in the package are connected via four AMD Infinity Fabric links running at
|
||||
a theoretical peak rate of 25 GT/sec, giving 200 GB/sec peak transfer bandwidth
|
||||
between the two GCDs of an OAM, or a bidirectional peak transfer bandwidth of
|
||||
400 GB/sec for the same.
|
||||
|
||||
## Node-level architecture
|
||||
|
||||
The following image shows the node-level architecture of a system that is
|
||||
based on the AMD Instinct MI250 accelerator. The MI250 OAMs attach to the host
|
||||
system via PCIe Gen 4 x16 links (yellow lines). Each GCD maintains its own PCIe
|
||||
x16 link to the host part of the system. Depending on the server platform, the
|
||||
GCD can attach to the AMD EPYC processor directly or via an optional PCIe switch
|
||||
. Note that some platforms may offer an x8 interface to the GCDs, which reduces
|
||||
the available host-to-GPU bandwidth.
|
||||
|
||||

|
||||
|
||||
The preceding image shows the node-level architecture of a system with AMD
|
||||
EPYC processors in a dual-socket configuration and four AMD Instinct MI250
|
||||
accelerators. The MI250 OAMs attach to the host processors system via PCIe Gen 4
|
||||
x16 links (yellow lines). Depending on the system design, a PCIe switch may
|
||||
exist to make more PCIe lanes available for additional components like network
|
||||
interfaces and/or storage devices. Each GCD maintains its own PCIe x16 link to
|
||||
the host part of the system or to the PCIe switch. Please note, some platforms
|
||||
may offer an x8 interface to the GCDs, which will reduce the available
|
||||
host-to-GPU bandwidth.
|
||||
|
||||
Between the OAMs and their respective GCDs, a peer-to-peer (P2P) network allows
|
||||
for direct data exchange between the GPU dies via AMD Infinity Fabric links (
|
||||
black, green, and red lines). Each of these 16-wide links connects to one of the
|
||||
two GPU dies in the MI250 OAM and operates at 25 GT/sec, which corresponds to a
|
||||
theoretical peak transfer rate of 50 GB/sec per link (or 100 GB/sec
|
||||
bidirectional peak transfer bandwidth). The GCD pairs 2 and 6 as well as GCDs 0
|
||||
and 4 connect via two XGMI links, which is indicated by the thicker red line in
|
||||
the preceding image.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user