Compare commits
163 Commits
docs/6.1.5
...
docs/6.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6815b69793 | ||
|
|
2f192a4768 | ||
|
|
0f80a3b068 | ||
|
|
c5bf17e358 | ||
|
|
5b7315f5b1 | ||
|
|
4302900e68 | ||
|
|
fe5fe6c4d0 | ||
|
|
aa5e1b98a9 | ||
|
|
8789d63887 | ||
|
|
0f61f2e8e0 | ||
|
|
f9586abf5a | ||
|
|
14f9c58e3a | ||
|
|
41383b3c77 | ||
|
|
2e7231fad2 | ||
|
|
ae158f00f6 | ||
|
|
275448a26e | ||
|
|
f6bf909ad1 | ||
|
|
6ae116f03c | ||
|
|
0c19e2da62 | ||
|
|
73d9fb01ab | ||
|
|
42085c14e6 | ||
|
|
e4774085da | ||
|
|
b6f9b58984 | ||
|
|
3b94287182 | ||
|
|
6366347c59 | ||
|
|
fc17a21260 | ||
|
|
e625ee55f6 | ||
|
|
78a93b63b2 | ||
|
|
b7be98cf43 | ||
|
|
4c0199abd9 | ||
|
|
d3ce15347f | ||
|
|
e07aef730e | ||
|
|
f4f58d4d83 | ||
|
|
d8bed576ce | ||
|
|
a09fabfc84 | ||
|
|
efb05885c3 | ||
|
|
0a9966cb3e | ||
|
|
707ef1da12 | ||
|
|
b5092ba9f1 | ||
|
|
e2a04586ad | ||
|
|
2ede39e314 | ||
|
|
b98bc71cb8 | ||
|
|
77bc63e5e2 | ||
|
|
a36ef20ada | ||
|
|
e602d2c52e | ||
|
|
4ce97de16e | ||
|
|
f7e6bd50ed | ||
|
|
f116ddd5c9 | ||
|
|
4c0650c44e | ||
|
|
73bc7d6167 | ||
|
|
84d44a7c3c | ||
|
|
f622b8796b | ||
|
|
5895f04f85 | ||
|
|
c36206ca46 | ||
|
|
e7939656e7 | ||
|
|
df8dce7f77 | ||
|
|
6b2987d9b5 | ||
|
|
4c65fa36da | ||
|
|
18a14f8187 | ||
|
|
c6a73fb255 | ||
|
|
b9183c1b7d | ||
|
|
6c1ec0e9a4 | ||
|
|
f8fcdbeac6 | ||
|
|
0411a5a806 | ||
|
|
095535cc93 | ||
|
|
2db956f3ed | ||
|
|
b46f82eab2 | ||
|
|
ad4e7f29ea | ||
|
|
2a402e010c | ||
|
|
27f5d9ad7d | ||
|
|
a2f69ec77a | ||
|
|
a1b9eac4af | ||
|
|
27d94c174a | ||
|
|
52ea80d262 | ||
|
|
e0555d7f81 | ||
|
|
00d6116f0a | ||
|
|
358585d00f | ||
|
|
499cff0da0 | ||
|
|
9c874ce984 | ||
|
|
f47afb7c66 | ||
|
|
a9d53e3d4e | ||
|
|
5ab19055ee | ||
|
|
4e9e38fcd0 | ||
|
|
2308f43653 | ||
|
|
2d61a92120 | ||
|
|
bef024a500 | ||
|
|
a6f550d716 | ||
|
|
71a306d62a | ||
|
|
c835c16d68 | ||
|
|
80ff647086 | ||
|
|
af8f67a155 | ||
|
|
38e86b97d4 | ||
|
|
c32d002239 | ||
|
|
a51b11dd36 | ||
|
|
b6c72fa7d2 | ||
|
|
16dc949166 | ||
|
|
f12354349a | ||
|
|
72cf5aa092 | ||
|
|
bf36a27e0b | ||
|
|
fe174ebf24 | ||
|
|
e9310c1794 | ||
|
|
060ab43b4d | ||
|
|
a24a38686f | ||
|
|
ebb584c0dd | ||
|
|
33ce708926 | ||
|
|
10f8efa7e8 | ||
|
|
63d3dfd344 | ||
|
|
f087dafca2 | ||
|
|
b6a305f924 | ||
|
|
717ec0df34 | ||
|
|
531796f523 | ||
|
|
9de6c61100 | ||
|
|
e5e8114c92 | ||
|
|
647634a976 | ||
|
|
bf869e9d70 | ||
|
|
5909efb01c | ||
|
|
a084244ac0 | ||
|
|
dbea7719f9 | ||
|
|
bdcb82372b | ||
|
|
222865c8be | ||
|
|
a4d6a8259c | ||
|
|
59df2fc110 | ||
|
|
f56aca0d31 | ||
|
|
62dd3820a2 | ||
|
|
4643ee1d74 | ||
|
|
8cb0813fac | ||
|
|
a069f2438f | ||
|
|
bbd5642d0d | ||
|
|
0762966fd1 | ||
|
|
8dd8596057 | ||
|
|
c71969b79a | ||
|
|
2c5aabec54 | ||
|
|
7edc2a677b | ||
|
|
7b883f3af4 | ||
|
|
ac7f0fc9d9 | ||
|
|
235191535c | ||
|
|
25c5163666 | ||
|
|
b08cef45cd | ||
|
|
00a786fb71 | ||
|
|
d7f514e447 | ||
|
|
7dcf79f95e | ||
|
|
f3dcb40950 | ||
|
|
e800ec1847 | ||
|
|
867d8e294e | ||
|
|
9285fbe704 | ||
|
|
e641b1b25f | ||
|
|
091fa3ef8e | ||
|
|
2a34212ddd | ||
|
|
f3cc6be6dd | ||
|
|
a3bb1d5e4f | ||
|
|
e24a0c8272 | ||
|
|
becbe5e3ae | ||
|
|
a36295c880 | ||
|
|
7f715fa474 | ||
|
|
cb6b61f3b7 | ||
|
|
9d6e415dde | ||
|
|
0e52d354a3 | ||
|
|
b601290c28 | ||
|
|
be4ed8cd84 | ||
|
|
3c42dc49ab | ||
|
|
8311130829 | ||
|
|
d991252467 | ||
|
|
e01f13e803 |
@@ -54,6 +54,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -69,17 +75,17 @@ jobs:
|
||||
dependencySource: fixed
|
||||
fixedComponentName: half
|
||||
fixedPipelineIdentifier: $(half560-pipeline-id)
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
skipLibraryLinking: true
|
||||
skipLlvmSymlink: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -87,7 +93,7 @@ jobs:
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=gfx942
|
||||
-DGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||
-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF
|
||||
@@ -95,3 +101,5 @@ jobs:
|
||||
-GNinja
|
||||
# REFERENCE: https://github.com/ROCm/composable_kernel/issues/782
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -10,6 +10,9 @@ parameters:
|
||||
default:
|
||||
- libnuma-dev
|
||||
- mesa-common-dev
|
||||
- ocl-icd-libopencl1
|
||||
- ocl-icd-opencl-dev
|
||||
- opencl-headers
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -55,17 +58,17 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
@@ -110,17 +113,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
|
||||
displayName: 'Artifact listing'
|
||||
|
||||
@@ -33,17 +33,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -25,6 +25,7 @@ parameters:
|
||||
- rocBLAS
|
||||
- hipBLAS
|
||||
- hipBLASLt
|
||||
- hipBLAS-common
|
||||
- half
|
||||
- composable_kernel
|
||||
- rocm-cmake
|
||||
@@ -43,6 +44,12 @@ jobs:
|
||||
pool: ${{ variables.LARGE_DISK_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -63,17 +70,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -87,3 +92,5 @@ jobs:
|
||||
-DBUILD_TESTING=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -68,6 +68,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -77,17 +83,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -97,3 +101,5 @@ jobs:
|
||||
-DROCM_DEP_ROCMCORE=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -36,17 +36,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -35,17 +35,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -19,6 +19,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- hipRAND
|
||||
- llvm-project
|
||||
- rocBLAS
|
||||
- rocm-cmake
|
||||
@@ -26,6 +27,7 @@ parameters:
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- rocRAND
|
||||
- ROCT-Thunk-Interface
|
||||
|
||||
jobs:
|
||||
@@ -43,6 +45,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -51,24 +59,16 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
# Set link to redirect llvm folder
|
||||
- task: Bash@3
|
||||
displayName: create symlink
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -78,3 +78,5 @@ jobs:
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$(Build.BinariesDirectory)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -35,17 +35,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -58,17 +58,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: llvm-project_repo
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -25,13 +25,19 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- job: composable_kernel
|
||||
timeoutInMinutes: 100
|
||||
timeoutInMinutes: 180
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -40,25 +46,26 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_HIP_FLAGS="-Wno-missing-include-dirs"
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=gfx942
|
||||
-DGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -24,9 +24,8 @@ jobs:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml
|
||||
parameters:
|
||||
useDefaultBranch: false
|
||||
componentName: HIP
|
||||
branchName: develop
|
||||
pipelineId: $(hip-pipeline-id)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Agent.BuildDirectory)/rocm
|
||||
|
||||
@@ -37,17 +37,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
62
.azuredevops/components/hipBLAS-common.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- git
|
||||
- wget
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- rocminfo
|
||||
|
||||
jobs:
|
||||
- job: hipBLAS_common
|
||||
variables:
|
||||
- group: common
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
@@ -20,16 +20,17 @@ parameters:
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- aomp
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- ROCR-Runtime
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- rocBLAS
|
||||
- rocSPARSE
|
||||
- rocSOLVER
|
||||
- aomp
|
||||
|
||||
jobs:
|
||||
- job: hipBLAS
|
||||
@@ -39,6 +40,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -59,17 +66,15 @@ jobs:
|
||||
targetType: inline
|
||||
script: sudo apt install --yes ./aocl-linux-aocc-4.1.0_1_amd64.deb
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -84,3 +89,5 @@ jobs:
|
||||
-DCPACK_SET_DESTDIR=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -23,7 +23,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
@@ -31,7 +31,7 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- job: hipBLASLt
|
||||
timeoutInMinutes: 100
|
||||
timeoutInMinutes: 300
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -50,6 +50,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -60,17 +66,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
displayName: ROCm symbolic link
|
||||
@@ -89,23 +93,13 @@ jobs:
|
||||
- script: sudo make install
|
||||
displayName: Install hipBLASLt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
# Set link to redirect llvm folder
|
||||
- task: Bash@3
|
||||
displayName: Symlink to rocm/lib/llvm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- script: sudo chmod 777 /mnt
|
||||
displayName: 'Set permissions for /mnt'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
cmakeBuildDir: /mnt/build
|
||||
cmakeSourceDir: $(Pipeline.Workspace)/s
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DTensile_LOGIC=
|
||||
-DTensile_CPU_THREADS=
|
||||
-DTensile_CODE_OBJECT_VERSION=default
|
||||
@@ -113,3 +107,5 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -30,6 +30,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -38,17 +44,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -57,6 +61,8 @@ jobs:
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_TEST=ON
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -38,6 +38,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -46,17 +52,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -66,7 +70,7 @@ jobs:
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DUSE_HIP_CLANG=ON
|
||||
-DHIP_COMPILER=clang
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
@@ -74,3 +78,5 @@ jobs:
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -32,6 +32,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -40,17 +46,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -61,6 +65,8 @@ jobs:
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -37,6 +37,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -45,17 +51,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
# build external gtest and lapack
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
@@ -78,3 +82,5 @@ jobs:
|
||||
-DUSE_CUDA=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -35,6 +35,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -43,17 +49,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -67,6 +71,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: hipSPARSE
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
publish: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
@@ -75,3 +80,4 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: testMatrices
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -47,6 +47,10 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -57,17 +61,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -75,7 +77,7 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DTensile_LOGIC=
|
||||
-DTensile_CPU_THREADS=
|
||||
-DTensile_CODE_OBJECT_VERSION=default
|
||||
@@ -84,3 +86,5 @@ jobs:
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -30,6 +30,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -38,17 +44,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -58,6 +62,8 @@ jobs:
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DHIPTENSOR_BUILD_TESTS=ON
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
multithreadFlag: -- -j32
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -38,17 +38,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -41,6 +41,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -49,17 +55,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- script: chmod +x $(Agent.BuildDirectory)/rocm/bin/hipify-perl
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
@@ -72,6 +76,8 @@ jobs:
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_TESTS=ON
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/share/rocm/cmake/
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -45,17 +45,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
# Build grpc
|
||||
- task: Bash@3
|
||||
|
||||
@@ -54,6 +54,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: 'Register libjpeg-turbo packages'
|
||||
@@ -115,17 +121,15 @@ jobs:
|
||||
targetType: inline
|
||||
script: sudo cmake --build . --target install
|
||||
workingDirectory: '$(Build.SourcesDirectory)/rapidjson/build'
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -136,3 +140,5 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -40,6 +40,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -48,17 +54,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -67,9 +71,11 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/share/rocm/cmake/
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_CLIENTS_BENCHMARKS=OFF
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -54,6 +54,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -63,43 +69,38 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Download AOCL'
|
||||
inputs:
|
||||
targetType: inline
|
||||
- task: Bash@3
|
||||
displayName: 'Download AOCL'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
script: wget -nv https://download.amd.com/developer/eula/aocl/aocl-4-2/aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- task: Bash@3
|
||||
displayName: 'Install AOCL'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo apt install --yes ./aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Download AOCL'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
script: wget -nv https://download.amd.com/developer/eula/aocl/aocl-4-1/aocl-linux-aocc-4.1.0_1_amd64.deb
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- task: Bash@3
|
||||
displayName: 'Install AOCL'
|
||||
inputs:
|
||||
targetType: inline
|
||||
- task: Bash@3
|
||||
displayName: 'Install AOCL'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
script: sudo apt install --yes ./aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
script: sudo apt install --yes ./aocl-linux-aocc-4.1.0_1_amd64.deb
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- script: echo $PATH
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -108,7 +109,7 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DTensile_CODE_OBJECT_VERSION=default
|
||||
-DTensile_LOGIC=asm_full
|
||||
-DTensile_SEPARATE_ARCHITECTURES=ON
|
||||
@@ -120,3 +121,5 @@ jobs:
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -29,6 +29,7 @@ parameters:
|
||||
- clr
|
||||
- rocminfo
|
||||
- rocm-core
|
||||
- rocprofiler-register
|
||||
|
||||
jobs:
|
||||
- job: rocDecode
|
||||
@@ -59,22 +60,19 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
@@ -11,6 +11,7 @@ parameters:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- libboost-program-options-dev
|
||||
- libdrm-dev
|
||||
- libgtest-dev
|
||||
- libfftw3-dev
|
||||
- python3-pip
|
||||
@@ -37,6 +38,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -45,17 +52,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -64,7 +69,7 @@ jobs:
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DUSE_HIP_CLANG=ON
|
||||
-DHIP_COMPILER=clang
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
@@ -72,3 +77,5 @@ jobs:
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -34,17 +34,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -30,6 +30,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -38,20 +44,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# ${{ }} are resolved during compile-time
|
||||
# so this next step is skipped completely until
|
||||
# we define explicit aptPackages needed to install
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -59,7 +60,9 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_BENCHMARK=ON
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DBUILD_TEST=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -33,6 +33,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -41,17 +47,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -59,6 +63,8 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_TEST=ON
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -38,6 +38,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -52,17 +58,15 @@ jobs:
|
||||
targetType: inline
|
||||
script: git clone --depth 1 --branch v3.9.1 https://github.com/Reference-LAPACK/lapack
|
||||
workingDirectory: '$(Build.SourcesDirectory)'
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -82,9 +86,11 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_CLIENTS_BENCHMARKS=OFF
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -40,6 +40,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -48,17 +54,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -68,7 +72,7 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_CLIENTS_BENCHMARKS=OFF
|
||||
@@ -77,6 +81,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: rocSPARSE
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
publish: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
@@ -85,3 +90,4 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: testMatrices
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -33,6 +33,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -41,17 +47,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -60,7 +64,8 @@ jobs:
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DBUILD_TEST=ON
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -37,6 +37,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -45,17 +51,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -65,7 +69,9 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DROCWMMA_BUILD_TESTS=ON
|
||||
-DROCWMMA_BUILD_SAMPLES=OFF
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
# gfx1030 not supported in documentation
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -39,6 +39,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
@@ -47,17 +53,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -66,6 +70,8 @@ jobs:
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_ROOT=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_HIP_ARCHITECTURES=gfx942
|
||||
-DCMAKE_HIP_ARCHITECTURES=$(JOB_GPU_TARGET)
|
||||
-DCMAKE_EXE_LINKER_FLAGS=-fgpu-rdc
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -48,17 +48,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -25,17 +25,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -54,6 +54,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -86,17 +92,15 @@ jobs:
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
cp -R hsa-amd-aqlprofile/opt/rocm-6.2.0-14213/* $(Agent.BuildDirectory)/rocm
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -105,5 +109,7 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DENABLE_LDCONFIG=OFF
|
||||
-DUSE_PROF_API=1
|
||||
-DGPU_TARGETS=gfx942
|
||||
-DGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -40,17 +40,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -37,6 +37,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -46,17 +52,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -65,6 +69,8 @@ jobs:
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DGPU_TARGETS=gfx942
|
||||
-DGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
@@ -31,6 +31,12 @@ jobs:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
@@ -39,17 +45,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# CI case: download latest default branch build
|
||||
- ${{ if eq(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
- ${{ if ne(parameters.checkoutRef, '') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -60,6 +64,8 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHALF_INCLUDE_DIRS=$(Agent.BuildDirectory)/rocm/include
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DAMDGPU_TARGETS=gfx942
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
260
.azuredevops/nightly/pytorch.yml
Normal file
@@ -0,0 +1,260 @@
|
||||
parameters:
|
||||
# ubuntu near-equivalent list of yum installs in https://github.com/ROCm/ROCm-docker/blob/master/dev/Dockerfile-centos-7-complete
|
||||
# plus additional packages found through iterative testing of pipeline
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- build-essential
|
||||
- git
|
||||
- ninja-build
|
||||
- openjdk-8-jdk
|
||||
- ca-certificates
|
||||
- bc
|
||||
- bridge-utils
|
||||
- cmake
|
||||
- devscripts
|
||||
- dkms
|
||||
- doxygen
|
||||
- libdpkg-dev
|
||||
- libdpkg-perl
|
||||
- libelf-dev
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- wget
|
||||
- ncurses-base
|
||||
- libncurses-dev
|
||||
- numactl
|
||||
- libnuma-dev
|
||||
- libssh-dev
|
||||
- libunwind-dev
|
||||
- llvm-dev
|
||||
- libpth-dev
|
||||
- qemu-kvm
|
||||
- re2c
|
||||
- subversion
|
||||
- fakeroot
|
||||
- autoconf
|
||||
- libgomp1
|
||||
- libtinfo-dev
|
||||
- libcholmod3
|
||||
- libsuitesparseconfig5
|
||||
- libstdc++-12-dev
|
||||
- python-is-python3
|
||||
- gfortran
|
||||
- libgfortran5
|
||||
- liblapack3
|
||||
- libblas3
|
||||
- libquadmath0
|
||||
- libmetis5
|
||||
- libamd2
|
||||
- libcamd2
|
||||
- libcolamd2
|
||||
- libccolamd2
|
||||
- libdrm-amdgpu1
|
||||
- ccache
|
||||
- zip
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- astunparse
|
||||
- expecttest!=0.2.0
|
||||
- hypothesis
|
||||
- numpy
|
||||
- psutil
|
||||
- pyyaml
|
||||
- requests
|
||||
- setuptools
|
||||
- types-dataclasses
|
||||
- typing-extensions>=4.8.0
|
||||
- sympy>=1.13.0
|
||||
- filelock
|
||||
- networkx
|
||||
- jinja2
|
||||
- fsspec
|
||||
- lintrunner
|
||||
- ninja
|
||||
- packaging
|
||||
- optree>=0.12.0
|
||||
# list from https://github.com/pytorch/builder/blob/main/manywheel/build_rocm.sh
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocminfo
|
||||
- MIOpen
|
||||
- clr
|
||||
- hipBLAS
|
||||
- hipFFT
|
||||
- hipRAND
|
||||
- hipSOLVER
|
||||
- hipSPARSE
|
||||
- ROCR-Runtime
|
||||
- llvm-project
|
||||
- rccl
|
||||
- rocBLAS
|
||||
- rocFFT
|
||||
- rocm_smi_lib
|
||||
- rocRAND
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
- roctracer
|
||||
- hipBLASLt
|
||||
- rocprofiler-register
|
||||
- rocm-core
|
||||
- rocPRIM
|
||||
# below are additional dependencies not called out by build script, but throw errors during cmake
|
||||
- hipCUB
|
||||
- rocThrust
|
||||
- hipBLAS-common
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
schedules:
|
||||
- cron: '30 7 * * *'
|
||||
displayName: nightly pytorch
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
always: true
|
||||
|
||||
jobs:
|
||||
- job: pytorch
|
||||
timeoutInMinutes: 120
|
||||
strategy:
|
||||
matrix:
|
||||
amd-staging-gfx942:
|
||||
ROCM_BRANCH: amd-staging
|
||||
JOB_GPU_TARGET: gfx942
|
||||
amd-staging-gfx90a:
|
||||
ROCM_BRANCH: amd-staging
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
# various flags/parameters expected by bash scripts in pytorch builder repo
|
||||
- name: ROCM_VERSION
|
||||
value: 6.3.0
|
||||
- name: ROCM_PATH
|
||||
value: /opt/rocm
|
||||
- name: DESIRED_CUDA
|
||||
value: 6.3.0
|
||||
- name: MKLROOT
|
||||
value: /opt/intel
|
||||
- name: AOTRITON_INSTALLED_PREFIX
|
||||
value: /opt/rocm/aotriton
|
||||
- name: DESIRED_PYTHON
|
||||
value: 3.10
|
||||
- name: PYTORCH_ROOT
|
||||
value: $(Build.SourcesDirectory)/pytorch
|
||||
- name: CMAKE_ARGS
|
||||
value: -GNinja
|
||||
- name: DESIRED_DEVTOOLSET
|
||||
value: cxx11-abi
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
# copy environment setup from https://github.com/pytorch/builder/blob/main/manywheel/Dockerfile
|
||||
# but instead of centos, use ubuntu environment
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# wheel install location different on azure agent compared to where wheel is assumed to be installed on upstream script
|
||||
- task: Bash@3
|
||||
displayName: wheel install path symlink
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir -p /opt/python/cp310-cp310/lib/python3.10
|
||||
sudo ln -s /usr/local/lib/python3.10/dist-packages /opt/python/cp310-cp310/lib/python3.10/site-packages
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
- task: Bash@3
|
||||
displayName: ROCm symbolic link
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- checkout: self
|
||||
- task: Bash@3
|
||||
displayName: git clone pytorch builder
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: git clone upstream pytorch
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/pytorch.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Patch out forced GPU testing block in pytorch build script
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git apply $(Build.SourcesDirectory)/.azuredevops/patches/pytorch/0001-ROCm-CI-patches.patch
|
||||
workingDirectory: $(Build.SourcesDirectory)/builder
|
||||
- task: Bash@3
|
||||
displayName: Install patchelf
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_patchelf.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install mkl dependency for magma
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_mkl.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install rocm drm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash pytorch/.ci/docker/common/install_rocm_drm.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install rocm magma
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo PYTORCH_ROCM_ARCH=$(JOB_GPU_TARGET) MKLROOT=$(MKLROOT) bash pytorch/.ci/docker/common/install_rocm_magma.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install AOTriton Shared Library
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo bash ./common/install_aotriton.sh /opt/rocm
|
||||
workingDirectory: $(Build.SourcesDirectory)/pytorch/.ci/docker
|
||||
- task: Bash@3
|
||||
displayName: Run ROCm Build Script
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
sudo
|
||||
DESIRED_CUDA=$(DESIRED_CUDA)
|
||||
PYTORCH_ROCM_ARCH=$(JOB_GPU_TARGET)
|
||||
GPU_TARGET=$(JOB_GPU_TARGET)
|
||||
DESIRED_PYTHON=$(DESIRED_PYTHON)
|
||||
PYTORCH_ROOT=$(PYTORCH_ROOT)
|
||||
CMAKE_ARGS=$(CMAKE_ARGS)
|
||||
AOTRITON_INSTALLED_PREFIX=$(AOTRITON_INSTALLED_PREFIX)
|
||||
DESIRED_DEVTOOLSET=$(DESIRED_DEVTOOLSET)
|
||||
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
PYTORCH_BUILD_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)
|
||||
PYTORCH_BUILD_NUMBER=$(date -u +%Y%m%d)
|
||||
bash ./manywheel/build_rocm.sh
|
||||
workingDirectory: $(Build.SourcesDirectory)/builder
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'ROCm pytorch wheel file Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetPath: /remote/wheelhouserocm$(ROCM_VERSION)
|
||||
@@ -11,6 +11,7 @@ parameters:
|
||||
- half
|
||||
- HIP
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- hipCUB
|
||||
- hipFFT
|
||||
@@ -72,6 +73,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Cleanup checkout space'
|
||||
@@ -84,28 +91,26 @@ jobs:
|
||||
SourceFolder: '$(Build.ArtifactStagingDirectory)'
|
||||
Contents: '/**/*'
|
||||
RemoveDotFiles: true
|
||||
- script: sudo chmod 777 /mnt
|
||||
displayName: 'Set permissions for /mnt'
|
||||
- script: df -h
|
||||
displayName: System disk space before ROCm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
extractToMnt: true
|
||||
skipLibraryLinking: true
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
- script: df -h
|
||||
displayName: System disk space after ROCm
|
||||
- script: du -sh /mnt/rocm
|
||||
- script: du -sh $(Agent.BuildDirectory)/rocm
|
||||
displayName: Uncompressed ROCm size
|
||||
- task: ArchiveFiles@2
|
||||
displayName: Compress rocm-nightly
|
||||
inputs:
|
||||
rootFolderOrFile: /mnt/rocm
|
||||
rootFolderOrFile: $(Agent.BuildDirectory)/rocm
|
||||
includeRootFolder: false
|
||||
archiveType: tar
|
||||
tarCompression: gz
|
||||
archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204.tar.gz
|
||||
archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204_$(JOB_GPU_TARGET).tar.gz
|
||||
- script: du -sh $(Build.ArtifactStagingDirectory)
|
||||
displayName: Compressed ROCm size
|
||||
- task: PublishPipelineArtifact@1
|
||||
|
||||
40
.azuredevops/patches/pytorch/0001-ROCm-CI-patches.patch
Normal file
@@ -0,0 +1,40 @@
|
||||
From b2d3c88f7a8b179e814e72f76f27e25c82659200 Mon Sep 17 00:00:00 2001
|
||||
From: Joseph Macaranas <Joseph.Macaranas@amd.com>
|
||||
Date: Tue, 30 Jul 2024 05:43:06 -0400
|
||||
Subject: [PATCH] ROCm CI patches
|
||||
|
||||
---
|
||||
manywheel/build_common.sh | 9 ---------
|
||||
1 file changed, 9 deletions(-)
|
||||
|
||||
diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
|
||||
index 08ca924..52c468f 100644
|
||||
--- a/manywheel/build_common.sh
|
||||
+++ b/manywheel/build_common.sh
|
||||
@@ -475,11 +475,9 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
fi
|
||||
|
||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||
-
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
fi
|
||||
-
|
||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
|
||||
# Print info on the libraries installed in this wheel
|
||||
@@ -491,11 +489,4 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
ldd "$installed_lib" || true
|
||||
done
|
||||
|
||||
- # Run the tests
|
||||
- echo "$(date) :: Running tests"
|
||||
- pushd "$PYTORCH_ROOT"
|
||||
- LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
|
||||
- "${SOURCE_DIR}/../run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
- popd
|
||||
- echo "$(date) :: Finished tests"
|
||||
fi
|
||||
--
|
||||
2.44.0.windows.1
|
||||
|
||||
29
.azuredevops/tag-builds/hipBLAS-common.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: develop
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
- repository: release_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/hipBLAS-common
|
||||
ref: ${{ parameters.checkoutRef }}
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/hipBLAS-common.yml
|
||||
parameters:
|
||||
checkoutRepo: release_repo
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -9,9 +9,15 @@ parameters:
|
||||
- name: useDefaultBranch
|
||||
type: boolean
|
||||
default: true
|
||||
- name: latestFromBranch
|
||||
type: boolean
|
||||
default: true
|
||||
- name: extractToMnt
|
||||
type: boolean
|
||||
default: false
|
||||
- name: fileFilter
|
||||
type: string
|
||||
default: ''
|
||||
- name: defaultBranchList
|
||||
type: object
|
||||
default:
|
||||
@@ -19,12 +25,13 @@ parameters:
|
||||
amdsmi: develop
|
||||
aomp-extras: aomp-dev
|
||||
aomp: aomp-dev
|
||||
clr: develop
|
||||
clr: amd-staging
|
||||
composable_kernel: develop
|
||||
half: master
|
||||
HIP: develop
|
||||
half: rocm
|
||||
HIP: amd-staging
|
||||
hipBLAS: develop
|
||||
hipBLASLt: develop
|
||||
hipBLAS-common: develop
|
||||
hipCUB: develop
|
||||
hipFFT: develop
|
||||
hipfort: develop
|
||||
@@ -42,10 +49,10 @@ parameters:
|
||||
rocAL: develop
|
||||
rocALUTION: develop
|
||||
rocBLAS: develop
|
||||
ROCdbgapi : amd-master
|
||||
ROCdbgapi : amd-staging
|
||||
rocDecode: develop
|
||||
rocFFT: develop
|
||||
rocgdb: amd-staging
|
||||
ROCgdb: amd-staging
|
||||
rocm-cmake: develop
|
||||
rocm-core: master
|
||||
rocm-examples: develop
|
||||
@@ -56,7 +63,7 @@ parameters:
|
||||
rocm_smi_lib: develop
|
||||
rocPRIM: develop
|
||||
rocprofiler-register: amd-mainline
|
||||
rocprofiler: amd-master
|
||||
rocprofiler: amd-staging
|
||||
ROCR-Runtime: master
|
||||
rocRAND: develop
|
||||
rocr_debug_agent: amd-staging
|
||||
@@ -64,13 +71,9 @@ parameters:
|
||||
rocSPARSE: develop
|
||||
ROCT-Thunk-Interface: master
|
||||
rocThrust: develop
|
||||
roctracer: amd-master
|
||||
roctracer: amd-staging
|
||||
rocWMMA: develop
|
||||
rpp: master
|
||||
- name: componentsFailureOkay
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
# BELOW REQUIRED IF useDefaultBranch false
|
||||
- name: branchName
|
||||
type: string
|
||||
@@ -84,11 +87,15 @@ steps:
|
||||
project: ROCm-CI
|
||||
definition: ${{ parameters.pipelineId }}
|
||||
specificBuildWithTriggering: true
|
||||
itemPattern: '**/*${{ parameters.fileFilter }}*'
|
||||
${{ if eq(parameters.latestFromBranch, true) }}:
|
||||
${{ if notIn(parameters.componentName, 'aomp') }}: # remove this once these pipelines are functional + up-to-date
|
||||
buildVersionToDownload: latestFromBranch # default is 'latest'
|
||||
${{ if eq(parameters.useDefaultBranch, true) }}:
|
||||
branchName: ${{ parameters.defaultBranchList[parameters.componentName] }}
|
||||
branchName: refs/heads/${{ parameters.defaultBranchList[parameters.componentName] }}
|
||||
${{ else }}:
|
||||
branchName: ${{ parameters.branchName }}
|
||||
${{ if in(parameters.componentName, parameters.componentsFailureOkay) }}:
|
||||
${{ if in(parameters.componentName, 'rocm-cmake') }}:
|
||||
allowPartiallySucceededBuilds: true
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
- task: ExtractFiles@1
|
||||
|
||||
@@ -9,6 +9,9 @@ parameters:
|
||||
- name: publish
|
||||
type: boolean
|
||||
default: true
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
steps:
|
||||
- task: ArchiveFiles@2
|
||||
@@ -17,7 +20,7 @@ steps:
|
||||
includeRootFolder: false
|
||||
archiveType: 'tar'
|
||||
tarCompression: 'gz'
|
||||
archiveFile: '$(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.SourceBranchName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.artifactName }}.tar.gz'
|
||||
archiveFile: '$(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.SourceBranchName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.artifactName }}_${{ parameters.gpuTarget }}.tar.gz'
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Cleanup Staging Area'
|
||||
inputs:
|
||||
|
||||
@@ -21,6 +21,9 @@ parameters:
|
||||
- name: fixedComponentName
|
||||
type: string
|
||||
default: ''
|
||||
- name: latestFromBranch
|
||||
type: boolean
|
||||
default: true
|
||||
# match case of the repo in this object for the left side of the maps
|
||||
# should not need to replace these parameters
|
||||
- name: stagingPipelineIdentifiers
|
||||
@@ -36,6 +39,7 @@ parameters:
|
||||
HIP: $(hip-pipeline-id)
|
||||
hipBLAS: $(hipblas-pipeline-id)
|
||||
hipBLASLt: $(hipblaslt-pipeline-id)
|
||||
hipBLAS-common: $(hipblas-common-pipeline-id)
|
||||
hipCUB: $(hipcub-pipeline-id)
|
||||
hipFFT: $(hipfft-pipeline-id)
|
||||
hipfort: $(hipfort-pipeline-id)
|
||||
@@ -91,6 +95,7 @@ parameters:
|
||||
HIP: $(hip-tagged-pipeline-id)
|
||||
hipBLAS: $(hipblas-tagged-pipeline-id)
|
||||
hipBLASLt: $(hipblaslt-tagged-pipeline-id)
|
||||
hipBLAS-common: $(hipblas-common-tagged-pipeline-id)
|
||||
hipCUB: $(hipcub-tagged-pipeline-id)
|
||||
hipFFT: $(hipfft-tagged-pipeline-id)
|
||||
hipfort: $(hipfort-tagged-pipeline-id)
|
||||
@@ -138,31 +143,106 @@ parameters:
|
||||
- name: skipLibraryLinking
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if llvm-project is not downloaded in a particular call
|
||||
# or if you just don't want the symlink
|
||||
- name: skipLlvmSymlink
|
||||
type: boolean
|
||||
default: false
|
||||
# some ROCm components can specify GPU target and this will affect downloads
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
# array of ROCm components that can specify GPU target or have dependency of such a component
|
||||
# these components would have parallel build jobs per GPU target so they produce multiple artifacts
|
||||
# only download artifact tied to the GPU target
|
||||
- name: componentsWithGPUTarget
|
||||
type: object
|
||||
default:
|
||||
- AMDMIGraphX
|
||||
- composable_kernel
|
||||
- hipBLASLt
|
||||
- hipCUB
|
||||
- hipFFT
|
||||
- hipRAND
|
||||
- hipSPARSELt
|
||||
- hipTensor
|
||||
- rccl
|
||||
- rocALUTION
|
||||
- rocBLAS
|
||||
- rocFFT
|
||||
- rocm-examples
|
||||
- rocPRIM
|
||||
- rocprofiler
|
||||
- rocRAND
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
- rocThrust
|
||||
- roctracer
|
||||
- rocWMMA
|
||||
- rpp
|
||||
# list below do not have flag for gpu target but have dependencies of components that do, so build separately per gpu target
|
||||
- hipBLAS
|
||||
- hipSOLVER
|
||||
- hipSPARSE
|
||||
- MIOpen
|
||||
- MIVision
|
||||
- rocAL
|
||||
- ROCmValidationSuite
|
||||
|
||||
steps:
|
||||
# assuming artifact-download.yml template file in same directory
|
||||
# for the case where rocm dependency item in list has a colon (:)
|
||||
# assume it is of the format of componentName:fileFilter
|
||||
# fileFilter could contain both a subcomponent name or gpu name separated by asterisks
|
||||
# gpu name will be specified by parameters.gpuTarget for components that are in componentsWithGPUTarget
|
||||
# e.g., gfx942 to only download artifacts from component for this gpu if applicable
|
||||
- ${{ each dependency in parameters.dependencyList }}:
|
||||
- ${{ if eq(parameters.dependencySource, 'staging') }}:
|
||||
- ${{ if contains(dependency, ':') }}:
|
||||
- template: artifact-download.yml
|
||||
parameters:
|
||||
componentName: ${{ split(dependency, ':')[0] }}
|
||||
extractToMnt: ${{ parameters.extractToMnt }}
|
||||
${{ if eq(parameters.dependencySource, 'staging') }}:
|
||||
pipelineId: ${{ parameters.stagingPipelineIdentifiers[ split(dependency, ':')[0] ] }}
|
||||
latestFromBranch: ${{ parameters.latestFromBranch }}
|
||||
${{ elseif eq(parameters.dependencySource, 'tag-builds') }}:
|
||||
pipelineId: ${{ parameters.taggedPipelineIdentifiers[ split(dependency, ':')[0] ] }}
|
||||
latestFromBranch: false
|
||||
${{ if containsValue( parameters.componentsWithGPUTarget, split(dependency, ':')[0] ) }}:
|
||||
fileFilter: "${{ split(dependency, ':')[1] }}*${{ parameters.gpuTarget }}"
|
||||
${{ else }}:
|
||||
fileFilter: ${{ split(dependency, ':')[1] }}
|
||||
# no colon (:) found in this item in the list
|
||||
- ${{ else }}:
|
||||
- template: artifact-download.yml
|
||||
parameters:
|
||||
componentName: ${{ dependency }}
|
||||
pipelineId: ${{ parameters.stagingPipelineIdentifiers[dependency] }}
|
||||
extractToMnt: ${{ parameters.extractToMnt }}
|
||||
- ${{ if eq(parameters.dependencySource, 'tag-builds') }}:
|
||||
- template: artifact-download.yml
|
||||
parameters:
|
||||
componentName: ${{ dependency }}
|
||||
pipelineId: ${{ parameters.taggedPipelineIdentifiers[dependency] }}
|
||||
extractToMnt: ${{ parameters.extractToMnt }}
|
||||
${{ if eq(parameters.dependencySource, 'staging') }}:
|
||||
pipelineId: ${{ parameters.stagingPipelineIdentifiers[dependency] }}
|
||||
latestFromBranch: ${{ parameters.latestFromBranch }}
|
||||
${{ elseif eq(parameters.dependencySource, 'tag-builds') }}:
|
||||
pipelineId: ${{ parameters.taggedPipelineIdentifiers[dependency] }}
|
||||
latestFromBranch: false
|
||||
${{ if containsValue( parameters.componentsWithGPUTarget, dependency ) }}:
|
||||
fileFilter: ${{ parameters.gpuTarget }}
|
||||
# fixed case only accepts one component at a time, so no array input
|
||||
- ${{ if eq(parameters.dependencySource, 'fixed') }}:
|
||||
- template: artifact-download.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.fixedComponentName }}
|
||||
pipelineId: ${{ parameters.fixedPipelineIdentifier }}
|
||||
latestFromBranch: false
|
||||
extractToMnt: ${{ parameters.extractToMnt }}
|
||||
# Set link to redirect llvm folder
|
||||
- ${{ if eq(parameters.skipLlvmSymlink, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: Symlink from rocm/llvm to rocm/lib/llvm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- task: Bash@3
|
||||
displayName: 'list downloaded ROCm files'
|
||||
displayName: 'List downloaded ROCm files'
|
||||
inputs:
|
||||
targetType: inline
|
||||
${{ if eq(parameters.extractToMnt, true) }}:
|
||||
@@ -171,7 +251,7 @@ steps:
|
||||
script: ls -1R $(Agent.BuildDirectory)/rocm
|
||||
- ${{ if eq(parameters.skipLibraryLinking, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'link ROCm shared libraries'
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
|
||||
@@ -12,7 +12,7 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 --version
|
||||
- script: pip list
|
||||
- script: pip list -v
|
||||
displayName: 'list python packages'
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Cleanup checkout space'
|
||||
|
||||
@@ -26,15 +26,17 @@ ATI
|
||||
AddressSanitizer
|
||||
AlexNet
|
||||
Arb
|
||||
Autocast
|
||||
BARs
|
||||
BLAS
|
||||
BMC
|
||||
BitCode
|
||||
Blit
|
||||
Blockwise
|
||||
Bluefield
|
||||
Bootloader
|
||||
CCD
|
||||
CDNA
|
||||
CHTML
|
||||
CIFAR
|
||||
CLI
|
||||
CLion
|
||||
@@ -67,6 +69,8 @@ CommonMark
|
||||
Concretized
|
||||
Conda
|
||||
ConnectX
|
||||
CuPy
|
||||
Dashboarding
|
||||
DDR
|
||||
DF
|
||||
DGEMM
|
||||
@@ -86,6 +90,7 @@ DataLoader
|
||||
DataParallel
|
||||
DeepSpeed
|
||||
Dependabot
|
||||
Deprecations
|
||||
DevCap
|
||||
Dockerfile
|
||||
Doxygen
|
||||
@@ -93,6 +98,7 @@ ELMo
|
||||
ENDPGM
|
||||
EPYC
|
||||
ESXi
|
||||
EoS
|
||||
FFT
|
||||
FFTs
|
||||
FFmpeg
|
||||
@@ -143,6 +149,7 @@ HPCG
|
||||
HPE
|
||||
HPL
|
||||
HSA
|
||||
HW
|
||||
HWE
|
||||
HWS
|
||||
Haswell
|
||||
@@ -166,12 +173,14 @@ ImageNet
|
||||
InfiniBand
|
||||
Inlines
|
||||
IntelliSense
|
||||
Interop
|
||||
Intersphinx
|
||||
Intra
|
||||
Ioffe
|
||||
JSON
|
||||
Jupyter
|
||||
KFD
|
||||
KFDTest
|
||||
KiB
|
||||
KV
|
||||
KVM
|
||||
@@ -208,12 +217,14 @@ MVFFR
|
||||
Makefile
|
||||
Makefiles
|
||||
Matplotlib
|
||||
Matrox
|
||||
Megatrends
|
||||
Megatron
|
||||
Mellanox
|
||||
Mellanox's
|
||||
Meta's
|
||||
MirroredStrategy
|
||||
Mixtral
|
||||
Multicore
|
||||
Multithreaded
|
||||
MyEnvironment
|
||||
@@ -242,6 +253,7 @@ OAMs
|
||||
OCP
|
||||
OEM
|
||||
OFED
|
||||
OMM
|
||||
OMP
|
||||
OMPI
|
||||
OMPT
|
||||
@@ -257,6 +269,9 @@ OpenMP
|
||||
OpenMPI
|
||||
OpenSSL
|
||||
OpenVX
|
||||
OpenXLA
|
||||
Oversubscription
|
||||
PagedAttention
|
||||
PCC
|
||||
PCI
|
||||
PCIe
|
||||
@@ -273,16 +288,22 @@ PerfDb
|
||||
Perfetto
|
||||
PipelineParallel
|
||||
PnP
|
||||
PowerEdge
|
||||
PowerShell
|
||||
PyPi
|
||||
PyTorch
|
||||
Qcycles
|
||||
Qwen
|
||||
RAII
|
||||
RAS
|
||||
RCCL
|
||||
RDC
|
||||
RDMA
|
||||
RDNA
|
||||
README
|
||||
RHEL
|
||||
RNN
|
||||
RNNs
|
||||
ROC
|
||||
ROCProfiler
|
||||
ROCTracer
|
||||
@@ -294,6 +315,7 @@ ROCm
|
||||
ROCmCC
|
||||
ROCmSoftwarePlatform
|
||||
ROCmValidationSuite
|
||||
ROCprofiler
|
||||
ROCr
|
||||
RST
|
||||
RW
|
||||
@@ -368,6 +390,7 @@ UC
|
||||
UCC
|
||||
UCX
|
||||
UIF
|
||||
UMC
|
||||
USM
|
||||
UTCL
|
||||
UTIL
|
||||
@@ -413,6 +436,7 @@ accuracies
|
||||
activations
|
||||
addr
|
||||
alloc
|
||||
allocatable
|
||||
allocator
|
||||
allocators
|
||||
amdgpu
|
||||
@@ -427,8 +451,10 @@ backends
|
||||
benchmarking
|
||||
bfloat
|
||||
bilinear
|
||||
bitcode
|
||||
bitsandbytes
|
||||
blit
|
||||
bootloader
|
||||
boson
|
||||
bosons
|
||||
buildable
|
||||
@@ -451,8 +477,10 @@ composable
|
||||
concretization
|
||||
config
|
||||
conformant
|
||||
constructible
|
||||
convolutional
|
||||
convolves
|
||||
copyable
|
||||
cpp
|
||||
csn
|
||||
cuBLAS
|
||||
@@ -474,6 +502,8 @@ denoise
|
||||
denoised
|
||||
denoises
|
||||
denormalize
|
||||
dequantization
|
||||
dequantizes
|
||||
deserializers
|
||||
detections
|
||||
dev
|
||||
@@ -485,8 +515,9 @@ distro
|
||||
el
|
||||
embeddings
|
||||
enablement
|
||||
endpgm
|
||||
encodings
|
||||
endpgm
|
||||
enqueue
|
||||
env
|
||||
epilog
|
||||
etcetera
|
||||
@@ -497,6 +528,7 @@ ffmpeg
|
||||
filesystem
|
||||
fortran
|
||||
fp
|
||||
gRPC
|
||||
galb
|
||||
gcc
|
||||
gdb
|
||||
@@ -504,6 +536,7 @@ gfortran
|
||||
gfx
|
||||
githooks
|
||||
github
|
||||
globals
|
||||
gnupg
|
||||
grayscale
|
||||
gzip
|
||||
@@ -527,12 +560,14 @@ hipfort
|
||||
hipify
|
||||
hipsolver
|
||||
hipsparse
|
||||
hlist
|
||||
hotspotting
|
||||
hpc
|
||||
hpp
|
||||
hsa
|
||||
hsakmt
|
||||
hyperparameter
|
||||
iDRAC
|
||||
ib_core
|
||||
inband
|
||||
incrementing
|
||||
@@ -543,11 +578,13 @@ init
|
||||
initializer
|
||||
inlining
|
||||
installable
|
||||
interop
|
||||
interprocedural
|
||||
intra
|
||||
invariants
|
||||
invocating
|
||||
ipo
|
||||
jax
|
||||
kdb
|
||||
latencies
|
||||
libfabric
|
||||
@@ -567,9 +604,11 @@ migraphx
|
||||
miopen
|
||||
miopengemm
|
||||
mivisionx
|
||||
mjx
|
||||
mkdir
|
||||
mlirmiopen
|
||||
mtypes
|
||||
mutex
|
||||
mvffr
|
||||
namespace
|
||||
namespaces
|
||||
@@ -592,25 +631,35 @@ pragma
|
||||
pre
|
||||
prebuilt
|
||||
precompiled
|
||||
preconditioner
|
||||
preconfigured
|
||||
prefetch
|
||||
prefetchable
|
||||
prefill
|
||||
prefills
|
||||
preloaded
|
||||
preprocess
|
||||
preprocessed
|
||||
preprocessing
|
||||
preprocessor
|
||||
prequantized
|
||||
prerequisites
|
||||
profiler
|
||||
profilers
|
||||
protobuf
|
||||
pseudorandom
|
||||
py
|
||||
quantile
|
||||
quantizer
|
||||
quasirandom
|
||||
queueing
|
||||
rccl
|
||||
rdc
|
||||
reStructuredText
|
||||
redirections
|
||||
refactorization
|
||||
reformats
|
||||
repo
|
||||
repos
|
||||
representativeness
|
||||
req
|
||||
@@ -622,10 +671,12 @@ roc
|
||||
rocAL
|
||||
rocALUTION
|
||||
rocBLAS
|
||||
rocDecode
|
||||
rocFFT
|
||||
rocLIB
|
||||
rocMLIR
|
||||
rocPRIM
|
||||
rocPyDecode
|
||||
rocRAND
|
||||
rocSOLVER
|
||||
rocSPARSE
|
||||
@@ -663,11 +714,14 @@ spack
|
||||
src
|
||||
stochastically
|
||||
strided
|
||||
subcommand
|
||||
subdirectory
|
||||
subexpression
|
||||
subfolder
|
||||
subfolders
|
||||
submodule
|
||||
supercomputing
|
||||
symlink
|
||||
td
|
||||
tensorfloat
|
||||
th
|
||||
@@ -687,6 +741,7 @@ txt
|
||||
uarch
|
||||
uncached
|
||||
uncorrectable
|
||||
unhandled
|
||||
uninstallation
|
||||
unsqueeze
|
||||
unstacking
|
||||
@@ -708,10 +763,13 @@ vectorized
|
||||
vectorizer
|
||||
vectorizes
|
||||
vjxb
|
||||
voxel
|
||||
walkthrough
|
||||
walkthroughs
|
||||
watchpoints
|
||||
wavefront
|
||||
wavefronts
|
||||
whitespace
|
||||
whitespaces
|
||||
workgroup
|
||||
workgroups
|
||||
|
||||
1971
RELEASE.md
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.1.2"
|
||||
<default revision="refs/tags/rocm-6.2.0"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
@@ -10,14 +10,17 @@
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="ROCT-Thunk-Interface" />
|
||||
<project name="amdsmi" />
|
||||
<project name="clang-ocl" />
|
||||
<project name="omniperf" />
|
||||
<project name="omnitrace" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
<project name="rocm-core" />
|
||||
<project name="rocm-examples" />
|
||||
<project name="rocminfo" />
|
||||
<project name="rocprofiler" />
|
||||
<project name="rocprofiler-register" />
|
||||
<project name="rocprofiler-sdk" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="HIP" />
|
||||
@@ -51,9 +54,11 @@
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocBLAS" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocFFT" />
|
||||
<project groups="mathlibs" name="rocPRIM" />
|
||||
<project groups="mathlibs" name="rocRAND" />
|
||||
|
||||
@@ -1,482 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="OpenMP support in ROCm">
|
||||
<meta name="keywords" content="OpenMP, LLVM, OpenMP toolchain">
|
||||
</head>
|
||||
|
||||
# OpenMP support in ROCm
|
||||
|
||||
## Introduction
|
||||
|
||||
The ROCm™ installation includes an LLVM-based implementation that fully supports
|
||||
the OpenMP 4.5 standard and a subset of OpenMP 5.0, 5.1, and 5.2 standards.
|
||||
Fortran, C/C++ compilers, and corresponding runtime libraries are included.
|
||||
Along with host APIs, the OpenMP compilers support offloading code and data onto
|
||||
GPU devices. This document briefly describes the installation location of the
|
||||
OpenMP toolchain, example usage of device offloading, and usage of `rocprof`
|
||||
with OpenMP applications. The GPUs supported are the same as those supported by
|
||||
this ROCm release. See the list of supported GPUs for {doc}`Linux<rocm-install-on-linux:reference/system-requirements>` and
|
||||
{doc}`Windows<rocm-install-on-windows:reference/system-requirements>`.
|
||||
|
||||
The ROCm OpenMP compiler is implemented using LLVM compiler technology.
|
||||
The following image illustrates the internal steps taken to translate a user’s application into an executable that can offload computation to the AMDGPU. The compilation is a two-pass process. Pass 1 compiles the application to generate the CPU code and Pass 2 links the CPU code to the AMDGPU device code.
|
||||
|
||||

|
||||
|
||||
### Installation
|
||||
|
||||
The OpenMP toolchain is automatically installed as part of the standard ROCm
|
||||
installation and is available under `/opt/rocm-{version}/llvm`. The
|
||||
sub-directories are:
|
||||
|
||||
* bin: Compilers (`flang` and `clang`) and other binaries.
|
||||
* examples: The usage section below shows how to compile and run these programs.
|
||||
* include: Header files.
|
||||
* lib: Libraries including those required for target offload.
|
||||
* lib-debug: Debug versions of the above libraries.
|
||||
|
||||
## OpenMP: usage
|
||||
|
||||
The example programs can be compiled and run by pointing the environment
|
||||
variable `ROCM_PATH` to the ROCm install directory.
|
||||
|
||||
**Example:**
|
||||
|
||||
```bash
|
||||
export ROCM_PATH=/opt/rocm-{version}
|
||||
cd $ROCM_PATH/share/openmp-extras/examples/openmp/veccopy
|
||||
sudo make run
|
||||
```
|
||||
|
||||
:::{note}
|
||||
`sudo` is required since we are building inside the `/opt` directory.
|
||||
Alternatively, copy the files to your home directory first.
|
||||
:::
|
||||
|
||||
The above invocation of Make compiles and runs the program. Note the options
|
||||
that are required for target offload from an OpenMP program:
|
||||
|
||||
```bash
|
||||
-fopenmp --offload-arch=<gpu-arch>
|
||||
```
|
||||
|
||||
:::{note}
|
||||
The compiler also accepts the alternative offloading notation:
|
||||
|
||||
```bash
|
||||
-fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=<gpu-arch>
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
Obtain the value of `gpu-arch` by running the following command:
|
||||
|
||||
```bash
|
||||
% /opt/rocm-{version}/bin/rocminfo | grep gfx
|
||||
```
|
||||
|
||||
[//]: # (dated link below, needs updating)
|
||||
|
||||
See the complete list of [compiler command-line references](https://github.com/ROCm/llvm-project/blob/amd-staging/openmp/docs/CommandLineArgumentReference.rst).
|
||||
|
||||
### Using `rocprof` with OpenMP
|
||||
|
||||
The following steps describe a typical workflow for using `rocprof` with OpenMP
|
||||
code compiled with AOMP:
|
||||
|
||||
1. Run `rocprof` with the program command line:
|
||||
|
||||
```bash
|
||||
% rocprof <application> <args>
|
||||
```
|
||||
|
||||
This produces a `results.csv` file in the user’s current directory that
|
||||
shows basic stats such as kernel names, grid size, number of registers used,
|
||||
etc. The user can choose to specify the preferred output file name using the
|
||||
o option.
|
||||
|
||||
2. Add options for a detailed result:
|
||||
|
||||
```bash
|
||||
--stats: % rocprof --stats <application> <args>
|
||||
```
|
||||
|
||||
The stats option produces timestamps for the kernels. Look into the output
|
||||
CSV file for the field, `DurationNs`, which is useful in getting an
|
||||
understanding of the critical kernels in the code.
|
||||
|
||||
Apart from `--stats`, the option `--timestamp` on produces a timestamp for
|
||||
the kernels.
|
||||
|
||||
3. After learning about the required kernels, the user can take a detailed look
|
||||
at each one of them. `rocprof` has support for hardware counters: a set of
|
||||
basic and a set of derived ones. See the complete list of counters using
|
||||
options --list-basic and --list-derived. `rocprof` accepts either a text or
|
||||
an XML file as an input.
|
||||
|
||||
For more details on `rocprof`, refer to the {doc}`ROCProfilerV1 User Manual <rocprofiler:rocprofv1>`.
|
||||
|
||||
### Using tracing options
|
||||
|
||||
**Prerequisite:** When using the `--sys-trace` option, compile the OpenMP
|
||||
program with:
|
||||
|
||||
```bash
|
||||
-Wl,-rpath,/opt/rocm-{version}/lib -lamdhip64
|
||||
```
|
||||
|
||||
The following tracing options are widely used to generate useful information:
|
||||
|
||||
* **`--hsa-trace`**: This option is used to get a JSON output file with the HSA
|
||||
API execution traces and a flat profile in a CSV file.
|
||||
|
||||
* **`--sys-trace`**: This allows programmers to trace both HIP and HSA calls.
|
||||
Since this option results in loading ``libamdhip64.so``, follow the
|
||||
prerequisite as mentioned above.
|
||||
|
||||
A CSV and a JSON file are produced by the above trace options. The CSV file
|
||||
presents the data in a tabular format, and the JSON file can be visualized using
|
||||
Google Chrome at chrome://tracing/ or [Perfetto](https://perfetto.dev/).
|
||||
Navigate to Chrome or Perfetto and load the JSON file to see the timeline of the
|
||||
HSA calls.
|
||||
|
||||
For more details on tracing, refer to the {doc}`ROCProfilerV1 User Manual <rocprofiler:rocprofv1>`.
|
||||
|
||||
### Environment variables
|
||||
|
||||
:::{table}
|
||||
:widths: auto
|
||||
| Environment Variable | Purpose |
|
||||
| --------------------------- | ---------------------------- |
|
||||
| `OMP_NUM_TEAMS` | To set the number of teams for kernel launch, which is otherwise chosen by the implementation by default. You can set this number (subject to implementation limits) for performance tuning. |
|
||||
| `LIBOMPTARGET_KERNEL_TRACE` | To print useful statistics for device operations. Setting it to 1 and running the program emits the name of every kernel launched, the number of teams and threads used, and the corresponding register usage. Setting it to 2 additionally emits timing information for kernel launches and data transfer operations between the host and the device. |
|
||||
| `LIBOMPTARGET_INFO` | To print informational messages from the device runtime as the program executes. Setting it to a value of 1 or higher, prints fine-grain information and setting it to -1 prints complete information. |
|
||||
| `LIBOMPTARGET_DEBUG` | To get detailed debugging information about data transfer operations and kernel launch when using a debug version of the device library. Set this environment variable to 1 to get the detailed information from the library. |
|
||||
| `GPU_MAX_HW_QUEUES` | To set the number of HSA queues in the OpenMP runtime. The HSA queues are created on demand up to the maximum value as supplied here. The queue creation starts with a single initialized queue to avoid unnecessary allocation of resources. The provided value is capped if it exceeds the recommended, device-specific value. |
|
||||
| `LIBOMPTARGET_AMDGPU_MAX_ASYNC_COPY_BYTES` | To set the threshold size up to which data transfers are initiated asynchronously. The default threshold size is 1*1024*1024 bytes (1MB). |
|
||||
| `OMPX_FORCE_SYNC_REGIONS` | To force the runtime to execute all operations synchronously, i.e., wait for an operation to complete immediately. This affects data transfers and kernel execution. While it is mainly designed for debugging, it may have a minor positive effect on performance in certain situations. |
|
||||
:::
|
||||
|
||||
## OpenMP: features
|
||||
|
||||
The OpenMP programming model is greatly enhanced with the following new features
|
||||
implemented in the past releases.
|
||||
|
||||
(openmp_usm)=
|
||||
|
||||
### Asynchronous behavior in OpenMP target regions
|
||||
|
||||
* Controlling Asynchronous Behavior
|
||||
|
||||
The OpenMP offloading runtime executes in an asynchronous fashion by default, allowing multiple data transfers to start concurrently. However, if the data to be transferred becomes larger than the default threshold of 1MB, the runtime falls back to a synchronous data transfer. The buffers that have been locked already are always executed asynchronously.
|
||||
You can overrule this default behavior by setting `LIBOMPTARGET_AMDGPU_MAX_ASYNC_COPY_BYTES` and `OMPX_FORCE_SYNC_REGIONS`. See the [Environment Variables](#environment-variables) table for details.
|
||||
|
||||
* Multithreaded Offloading on the Same Device
|
||||
|
||||
The `libomptarget` plugin for GPU offloading allows creation of separate configurable HSA queues per chiplet, which enables two or more threads to concurrently offload to the same device.
|
||||
|
||||
* Parallel Memory Copy Invocations
|
||||
|
||||
Implicit asynchronous execution of single target region enables parallel memory copy invocations.
|
||||
|
||||
### Unified shared memory
|
||||
|
||||
Unified Shared Memory (USM) provides a pointer-based approach to memory
|
||||
management. To implement USM, fulfill the following system requirements along
|
||||
with Xnack capability.
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
* Linux Kernel versions above 5.14
|
||||
* Latest KFD driver packaged in ROCm stack
|
||||
* Xnack, as USM support can only be tested with applications compiled with Xnack
|
||||
capability
|
||||
|
||||
#### Xnack capability
|
||||
|
||||
When enabled, Xnack capability allows GPU threads to access CPU (system) memory,
|
||||
allocated with OS-allocators, such as `malloc`, `new`, and `mmap`. Xnack must be
|
||||
enabled both at compile- and run-time. To enable Xnack support at compile-time,
|
||||
use:
|
||||
|
||||
```bash
|
||||
--offload-arch=gfx908:xnack+
|
||||
```
|
||||
|
||||
Or use another functionally equivalent option Xnack-any:
|
||||
|
||||
```bash
|
||||
--offload-arch=gfx908
|
||||
```
|
||||
|
||||
To enable Xnack functionality at runtime on a per-application basis,
|
||||
use environment variable:
|
||||
|
||||
```bash
|
||||
HSA_XNACK=1
|
||||
```
|
||||
|
||||
When Xnack support is not needed:
|
||||
|
||||
* Build the applications to maximize resource utilization using:
|
||||
|
||||
```bash
|
||||
--offload-arch=gfx908:xnack-
|
||||
```
|
||||
|
||||
* At runtime, set the `HSA_XNACK` environment variable to 0.
|
||||
|
||||
#### Unified shared memory pragma
|
||||
|
||||
This OpenMP pragma is available on MI200 through `xnack+` support.
|
||||
|
||||
```bash
|
||||
omp requires unified_shared_memory
|
||||
```
|
||||
|
||||
As stated in the OpenMP specifications, this pragma makes the map clause on
|
||||
target constructs optional. By default, on MI200, all memory allocated on the
|
||||
host is fine grain. Using the map clause on a target clause is allowed, which
|
||||
transforms the access semantics of the associated memory to coarse grain.
|
||||
|
||||
```bash
|
||||
A simple program demonstrating the use of this feature is:
|
||||
$ cat parallel_for.cpp
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define N 64
|
||||
#pragma omp requires unified_shared_memory
|
||||
int main() {
|
||||
int n = N;
|
||||
int *a = new int[n];
|
||||
int *b = new int[n];
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
b[i] = i;
|
||||
|
||||
#pragma omp target parallel for map(to:b[:n])
|
||||
for(int i = 0; i < n; i++)
|
||||
a[i] = b[i];
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
if(a[i] != i)
|
||||
printf("error at %d: expected %d, got %d\n", i, i+1, a[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
$ clang++ -O2 -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:xnack+ parallel_for.cpp
|
||||
$ HSA_XNACK=1 ./a.out
|
||||
```
|
||||
|
||||
In the above code example, pointer “a” is not mapped in the target region, while
|
||||
pointer “b” is. Both are valid pointers on the GPU device and passed by-value to
|
||||
the kernel implementing the target region. This means the pointer values on the
|
||||
host and the device are the same.
|
||||
|
||||
The difference between the memory pages pointed to by these two variables is
|
||||
that the pages pointed by “a” are in fine-grain memory, while the pages pointed
|
||||
to by “b” are in coarse-grain memory during and after the execution of the
|
||||
target region. This is accomplished in the OpenMP runtime library with calls to
|
||||
the ROCr runtime to set the pages pointed by “b” as coarse grain.
|
||||
|
||||
### OMPT target support
|
||||
|
||||
The OpenMP runtime in ROCm implements a subset of the OMPT device APIs, as
|
||||
described in the OpenMP specification document. These APIs allow first-party
|
||||
tools to examine the profile and kernel traces that execute on a device. A tool
|
||||
can register callbacks for data transfer and kernel dispatch entry points or use
|
||||
APIs to start and stop tracing for device-related activities such as data
|
||||
transfer and kernel dispatch timings and associated metadata. If device tracing
|
||||
is enabled, trace records for device activities are collected during program
|
||||
execution and returned to the tool using the APIs described in the
|
||||
specification.
|
||||
|
||||
The following example demonstrates how a tool uses the supported OMPT target
|
||||
APIs. The `README` in `/opt/rocm/llvm/examples/tools/ompt` outlines the steps to
|
||||
be followed, and the provided example can be run as shown below:
|
||||
|
||||
```bash
|
||||
cd $ROCM_PATH/share/openmp-extras/examples/tools/ompt/veccopy-ompt-target-tracing
|
||||
sudo make run
|
||||
```
|
||||
|
||||
The file `veccopy-ompt-target-tracing.c` simulates how a tool initiates device
|
||||
activity tracing. The file `callbacks.h` shows the callbacks registered and
|
||||
implemented by the tool.
|
||||
|
||||
### Floating point atomic operations
|
||||
|
||||
The MI200-series GPUs support the generation of hardware floating-point atomics
|
||||
using the OpenMP atomic pragma. The support includes single- and
|
||||
double-precision floating-point atomic operations. The programmer must ensure
|
||||
that the memory subjected to the atomic operation is in coarse-grain memory by
|
||||
mapping it explicitly with the help of map clauses when not implicitly mapped by
|
||||
the compiler as per the [OpenMP
|
||||
specifications](https://www.openmp.org/specifications/). This makes these
|
||||
hardware floating-point atomic instructions “fast,” as they are faster than
|
||||
using a default compare-and-swap loop scheme, but at the same time “unsafe,” as
|
||||
they are not supported on fine-grain memory. The operation in
|
||||
`unified_shared_memory` mode also requires programmers to map the memory
|
||||
explicitly when not implicitly mapped by the compiler.
|
||||
|
||||
To request fast floating-point atomic instructions at the file level, use
|
||||
compiler flag `-munsafe-fp-atomics` or a hint clause on a specific pragma:
|
||||
|
||||
```bash
|
||||
double a = 0.0;
|
||||
#pragma omp atomic hint(AMD_fast_fp_atomics)
|
||||
a = a + 1.0;
|
||||
```
|
||||
|
||||
:::{note}
|
||||
`AMD_unsafe_fp_atomics` is an alias for `AMD_fast_fp_atomics`, and
|
||||
`AMD_safe_fp_atomics` is implemented with a compare-and-swap loop.
|
||||
:::
|
||||
|
||||
To disable the generation of fast floating-point atomic instructions at the file
|
||||
level, build using the option `-msafe-fp-atomics` or use a hint clause on a
|
||||
specific pragma:
|
||||
|
||||
```bash
|
||||
double a = 0.0;
|
||||
#pragma omp atomic hint(AMD_safe_fp_atomics)
|
||||
a = a + 1.0;
|
||||
```
|
||||
|
||||
The hint clause value always has a precedence over the compiler flag, which
|
||||
allows programmers to create atomic constructs with a different behavior than
|
||||
the rest of the file.
|
||||
|
||||
See the example below, where the user builds the program using
|
||||
`-msafe-fp-atomics` to select a file-wide “safe atomic” compilation. However,
|
||||
the fast atomics hint clause over variable “a” takes precedence and operates on
|
||||
“a” using a fast/unsafe floating-point atomic, while the variable “b” in the
|
||||
absence of a hint clause is operated upon using safe floating-point atomics as
|
||||
per the compiler flag.
|
||||
|
||||
```bash
|
||||
double a = 0.0;.
|
||||
#pragma omp atomic hint(AMD_fast_fp_atomics)
|
||||
a = a + 1.0;
|
||||
|
||||
double b = 0.0;
|
||||
#pragma omp atomic
|
||||
b = b + 1.0;
|
||||
```
|
||||
|
||||
### AddressSanitizer tool
|
||||
|
||||
AddressSanitizer (ASan) is a memory error detector tool utilized by applications to
|
||||
detect various errors ranging from spatial issues such as out-of-bound access to
|
||||
temporal issues such as use-after-free. The AOMP compiler supports ASan for AMD
|
||||
GPUs with applications written in both HIP and OpenMP.
|
||||
|
||||
**Features supported on host platform (Target x86_64):**
|
||||
|
||||
* Use-after-free
|
||||
* Buffer overflows
|
||||
* Heap buffer overflow
|
||||
* Stack buffer overflow
|
||||
* Global buffer overflow
|
||||
* Use-after-return
|
||||
* Use-after-scope
|
||||
* Initialization order bugs
|
||||
|
||||
**Features supported on AMDGPU platform (`amdgcn-amd-amdhsa`):**
|
||||
|
||||
* Heap buffer overflow
|
||||
* Global buffer overflow
|
||||
|
||||
**Software (kernel/OS) requirements:** Unified Shared Memory support with Xnack
|
||||
capability. See the section on [Unified Shared Memory](#unified-shared-memory)
|
||||
for prerequisites and details on Xnack.
|
||||
|
||||
**Example:**
|
||||
|
||||
* Heap buffer overflow
|
||||
|
||||
```bash
|
||||
void main() {
|
||||
....... // Some program statements
|
||||
....... // Some program statements
|
||||
#pragma omp target map(to : A[0:N], B[0:N]) map(from: C[0:N])
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int i =0 ; i < N; i++){
|
||||
C[i+10] = A[i] + B[i];
|
||||
} // end of for loop
|
||||
}
|
||||
....... // Some program statements
|
||||
}// end of main
|
||||
```
|
||||
|
||||
See the complete sample code for heap buffer overflow
|
||||
[here](https://github.com/ROCm/aomp/blob/aomp-dev/examples/tools/asan/heap_buffer_overflow/openmp/vecadd-HBO.cpp).
|
||||
|
||||
* Global buffer overflow
|
||||
|
||||
```bash
|
||||
#pragma omp declare target
|
||||
int A[N],B[N],C[N];
|
||||
#pragma omp end declare target
|
||||
void main(){
|
||||
...... // some program statements
|
||||
...... // some program statements
|
||||
#pragma omp target data map(to:A[0:N],B[0:N]) map(from: C[0:N])
|
||||
{
|
||||
#pragma omp target update to(A,B)
|
||||
#pragma omp target parallel for
|
||||
for(int i=0; i<N; i++){
|
||||
C[i]=A[i*100]+B[i+22];
|
||||
} // end of for loop
|
||||
#pragma omp target update from(C)
|
||||
}
|
||||
........ // some program statements
|
||||
} // end of main
|
||||
```
|
||||
|
||||
See the complete sample code for global buffer overflow
|
||||
[here](https://github.com/ROCm/aomp/blob/aomp-dev/examples/tools/asan/global_buffer_overflow/openmp/vecadd-GBO.cpp).
|
||||
|
||||
### Clang compiler option for kernel optimization
|
||||
|
||||
You can use the clang compiler option `-fopenmp-target-fast` for kernel optimization if certain constraints implied by its component options are satisfied. `-fopenmp-target-fast` enables the following options:
|
||||
|
||||
* `-fopenmp-target-ignore-env-vars`: It enables code generation of specialized kernels including no-loop and Cross-team reductions.
|
||||
|
||||
* `-fopenmp-assume-no-thread-state`: It enables the compiler to assume that no thread in a parallel region modifies an Internal Control Variable (`ICV`), thus potentially reducing the device runtime code execution.
|
||||
|
||||
* `-fopenmp-assume-no-nested-parallelism`: It enables the compiler to assume that no thread in a parallel region encounters a parallel region, thus potentially reducing the device runtime code execution.
|
||||
|
||||
* `-O3` if no `-O*` is specified by the user.
|
||||
|
||||
### Specialized kernels
|
||||
|
||||
Clang will attempt to generate specialized kernels based on compiler options and OpenMP constructs. The following specialized kernels are supported:
|
||||
|
||||
* No-loop
|
||||
* Big-jump-loop
|
||||
* Cross-team reductions
|
||||
|
||||
To enable the generation of specialized kernels, follow these guidelines:
|
||||
|
||||
* Do not specify teams, threads, and schedule-related environment variables. The `num_teams` clause in an OpenMP target construct acts as an override and prevents the generation of the no-loop kernel. If the specification of `num_teams` clause is a user requirement then clang tries to generate the big-jump-loop kernel instead of the no-loop kernel.
|
||||
|
||||
* Assert the absence of the teams, threads, and schedule-related environment variables by adding the command-line option `-fopenmp-target-ignore-env-vars`.
|
||||
|
||||
* To automatically enable the specialized kernel generation, use `-Ofast` or `-fopenmp-target-fast` for compilation.
|
||||
|
||||
* To disable specialized kernel generation, use `-fno-openmp-target-ignore-env-vars`.
|
||||
|
||||
#### No-loop kernel generation
|
||||
|
||||
The no-loop kernel generation feature optimizes the compiler performance by generating a specialized kernel for certain OpenMP target constructs such as target teams distribute parallel for. The specialized kernel generation feature assumes every thread executes a single iteration of the user loop, which leads the runtime to launch a total number of GPU threads equal to or greater than the iteration space size of the target region loop. This allows the compiler to generate code for the loop body without an enclosing loop, resulting in reduced control-flow complexity and potentially better performance.
|
||||
|
||||
#### Big-jump-loop kernel generation
|
||||
|
||||
A no-loop kernel is not generated if the OpenMP teams construct uses a `num_teams` clause. Instead, the compiler attempts to generate a different specialized kernel called the big-jump-loop kernel. The compiler launches the kernel with a grid size determined by the number of teams specified by the OpenMP `num_teams` clause and the `blocksize` chosen either by the compiler or specified by the corresponding OpenMP clause.
|
||||
|
||||
#### Cross-team optimized reduction kernel generation
|
||||
|
||||
If the OpenMP construct has a reduction clause, the compiler attempts to generate optimized code by utilizing efficient cross-team communication. New APIs for cross-team reduction are implemented in the device runtime and are automatically generated by clang.
|
||||
@@ -25,66 +25,69 @@ additional licenses. Please review individual repositories for more information.
|
||||
<!-- spellcheck-disable -->
|
||||
| Component | License |
|
||||
|:---------------------|:-------------------------|
|
||||
| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/develop/LICENSE.txt) |
|
||||
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
|
||||
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
|
||||
| [AMDMIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
|
||||
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
|
||||
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
|
||||
| [AMD Common Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/develop/LICENCE) |
|
||||
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
|
||||
| [hipamd](https://github.com/ROCm/clr/tree/develop/hipamd) | [MIT](https://github.com/ROCm/clr/blob/develop/hipamd/LICENSE.txt) |
|
||||
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/develop/opencl) | [MIT](https://github.com/ROCm/clr/blob/develop/opencl/LICENSE.txt) |
|
||||
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
|
||||
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/develop/LICENSE) |
|
||||
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
|
||||
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
|
||||
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
|
||||
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
|
||||
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
|
||||
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
|
||||
| [clang-ocl](https://github.com/ROCm/clang-ocl/) | [MIT](https://github.com/ROCm/clang-ocl/blob/master/LICENSE) |
|
||||
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
|
||||
| [ROCT-Thunk-Interface](https://github.com/ROCm/ROCT-Thunk-Interface/) | [MIT](https://github.com/ROCm/ROCT-Thunk-Interface/blob/master/LICENSE.md) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/master/LICENSE.txt) |
|
||||
| [ROCR Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
|
||||
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
|
||||
| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/develop/LICENSE.txt) |
|
||||
| [hipamd](https://github.com/ROCm/clr/tree/develop/hipamd) | [MIT](https://github.com/ROCm/clr/blob/develop/hipamd/LICENSE.txt) |
|
||||
| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
|
||||
| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
|
||||
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
|
||||
| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
|
||||
| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
|
||||
| [hipFORT](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
|
||||
| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
|
||||
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
|
||||
| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
|
||||
| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
|
||||
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html) |
|
||||
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
|
||||
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
|
||||
| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
|
||||
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
|
||||
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
|
||||
| [Omniperf](https://github.com/ROCm/omniperf) | [MIT](https://github.com/ROCm/omniperf/blob/main/LICENSE) |
|
||||
| [Omnitrace](https://github.com/ROCm/omnitrace) | [MIT](https://github.com/ROCm/omnitrace/blob/main/LICENSE) |
|
||||
| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
|
||||
| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
|
||||
| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
|
||||
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
|
||||
| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
|
||||
| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
|
||||
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
|
||||
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
|
||||
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
|
||||
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
|
||||
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
|
||||
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
|
||||
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
|
||||
| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/develop/LICENSE) |
|
||||
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
|
||||
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/develop/opencl) | [MIT](https://github.com/ROCm/clr/blob/develop/opencl/LICENSE.txt) |
|
||||
| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
|
||||
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/develop/License.txt) |
|
||||
| [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
|
||||
| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
|
||||
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
|
||||
| [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
|
||||
| [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE) |
|
||||
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
|
||||
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/master/LICENSE.txt) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
|
||||
| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
|
||||
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
|
||||
| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/develop/LICENSE) |
|
||||
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
|
||||
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
|
||||
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/develop/License.txt) |
|
||||
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/develop/LICENSE) |
|
||||
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
|
||||
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
|
||||
| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
|
||||
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
|
||||
| [ROCT-Thunk-Interface](https://github.com/ROCm/ROCT-Thunk-Interface/) | [MIT](https://github.com/ROCm/ROCT-Thunk-Interface/blob/master/LICENSE.md) |
|
||||
| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
|
||||
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
|
||||
| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
|
||||
| [ROCmValidationSuite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
|
||||
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html)
|
||||
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
|
||||
|
||||
@@ -8,121 +8,172 @@ Compatibility matrix
|
||||
|
||||
Use this matrix to view the ROCm compatibility across successive major and minor releases.
|
||||
|
||||
You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
|
||||
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.1.5", "6.1.2", "6.0.0"
|
||||
:header: "ROCm Version", "6.2.0", "6.1.2", "6.0.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`,"Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4, 22.04.3","Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4 [#red-hat94]_, 9.3, 9.2","RHEL 9.4 [#red-hat94]_, 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`, "Ubuntu 24.04","",""
|
||||
,"Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4","Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94]_, 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,CentOS 7.9,CentOS 7.9
|
||||
,Oracle Linux 8.9 [#oracle89]_,Oracle Linux 8.9 [#oracle89]_,
|
||||
,.. _architecture-support-compatibility-matrix,,
|
||||
:doc:`GFX Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
|
||||
,"Oracle Linux 8.9 [#oracle89]_","Oracle Linux 8.9 [#oracle89]_",""
|
||||
,".. _architecture-support-compatibility-matrix:",,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix,,
|
||||
:doc:`GFX Card <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
|
||||
,".. _gpu-support-compatibility-matrix:",,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
, gfx942 [#mi300_612]_, gfx942 [#mi300_612]_, gfx942 [#mi300_600]_
|
||||
,gfx942 [#mi300_620]_, gfx942 [#mi300_612]_, gfx942 [#mi300_600]_
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
ECOSYSTEM SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1"
|
||||
FRAMEWORK SUPPORT,".. _framework-support-compatibility-matrix:",,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <rocm-install-on-linux:install/3rd-party/jax-install>`,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.17.3,1.14.1
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
THIRD PARTY COMMS,".. _thirdpartycomms-support-compatibility-matrix:",,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.14.1,>=1.14.1
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.1.0,2.1.0,2.0.1
|
||||
CUB,2.1.0,2.1.0,2.0.1
|
||||
THIRD PARTY ALGORITHM,".. _thirdpartyalgorithm-support-compatibility-matrix:",,
|
||||
Thrust,2.2.0,2.1.0,2.0.1
|
||||
CUB,2.2.0,2.1.0,2.0.1
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
ML & COMPUTER VISION,".. _mllibs-support-compatibility-matrix:",,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.9.0,2.9.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.1.0,3.1.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,2.5.0,2.5.0,2.5.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.9.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.1.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,2.5.0,2.5.0
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.5.0,1.4.0
|
||||
:doc:`rocAL <rocal:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,N/A
|
||||
:doc:`RPP <rpp:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,N/A,N/A
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.18.6,2.18.6,2.18.3
|
||||
COMMUNICATION,".. _commlibs-support-compatibility-matrix:",,
|
||||
:doc:`RCCL <rccl:index>`,2.20.5,2.18.6,2.18.3
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
MATH LIBS,".. _mathlibs-support-compatibility-matrix:",,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.1.0,2.1.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.7.0,0.7.0,0.6.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.1.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.7.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.14,1.0.14,1.0.13
|
||||
:doc:`hipFORT <hipfort:index>`,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.1.1,2.1.1,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.0.1,3.0.1,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.0,0.2.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.1.1,3.1.1,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.1.2,4.1.2,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.27,1.0.27,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.0.1,3.0.1,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.25.0,3.25.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.1.2,3.1.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.4.0,1.4.0,1.3.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.1.1,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.0.1,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.1.1,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.0,4.1.2,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.28,1.0.27,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.0.1,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.25.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.1.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.5.0,1.4.0,1.3.0
|
||||
`Tensile <https://github.com/ROCm/Tensile>`_,4.40.0,4.40.0,4.39.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.1.0,3.1.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.2.0,1.2.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.1.0,3.1.0,3.0.0
|
||||
PRIMITIVES,".. _primitivelibs-support-compatibility-matrix:",,
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.1.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.2.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.1.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.0.1,3.0.1,3.0.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.1.40093,6.1.40093,6.1.32830
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.12.0,0.12.0,0.11.0
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.1.5,6.1.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240125.5.08,20240125.5.08,20231016.2.245
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41133,6.1.40093,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.0,6.1.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.1.4246,20240125.5.08,20231016.2.245
|
||||
,,,
|
||||
TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.5.1,24.5.1,23.4.2
|
||||
:doc:`HIPIFY <hipify:index>`,17.0.0.24193,17.0.0.24193,17.0.0.23483
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.71.0,0.71.0,0.71.0
|
||||
SYSTEM MGMT TOOLS,".. _tools-support-compatibility-matrix:",,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.2,24.5.1,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.0.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60105,2.0.60102,2.0.60000
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.3.0,0.3.0,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60105,4.1.60102,4.1.60000
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.2.0,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.0,rocm-6.1.2,rocm-6.0.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,N/A,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,N/A,N/A
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.1.0,14.1.0,13.2.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.2.0,7.2.0,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.1.5,rocm-6.1.2,rocm-6.0.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60200,2.0.60102,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60200,4.1.60102,4.1.60000
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.12.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.1.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.3.0,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3,2.0.3
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,17.0.0.24193,17.0.0.24193,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,17.0.0.24193,17.0.0.24193,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,17.0.0.24193,17.0.0.24193,17.0.0.23483
|
||||
COMPILERS,".. _compilers-support-compatibility-matrix:",,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.1.40093,6.1.40093,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.1.40093,6.1.40093,6.1.32830
|
||||
RUNTIMES,".. _runtime-support-compatibility-matrix:",,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41133,6.1.40093,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.2.41133,6.1.40093,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.13.0,1.12.0
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#Ubuntu220405] Preview support of Ubuntu 22.04.5 only.
|
||||
.. [#Ubuntu220405] Preview support of Ubuntu 22.04.5 only
|
||||
.. [#red-hat94] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#oracle89] **For ROCm 6.1.1** - Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_612] **For ROCm 6.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_600] **For ROCm 6.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9 and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#oracle89] Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_600] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
|
||||
..
|
||||
Footnotes and ref anchors in below historical tables should be appended with "-past-60", to differentiate from the
|
||||
footnote references in the above, latest, compatibility matrix. It also allows to easily find & replace.
|
||||
An easy way to work is to download the historical.CSV file, and update open it in excel. Then when content is ready,
|
||||
delete the columns you don't need, to build the current compatibility matrix to use in above table. Find & replace all
|
||||
instances of "-past-60" to make it ready for above table.
|
||||
|
||||
|
||||
.. _past-rocm-compatibility-matrix:
|
||||
|
||||
Past versions of ROCm compatibility matrix
|
||||
***************************************************
|
||||
|
||||
Expand for full historical view of:
|
||||
|
||||
.. dropdown:: ROCm 6.0 - Present
|
||||
|
||||
You can `download the entire .csv <../downloads/compatibility-matrix-historical-6.0.csv>`_ for offline reference.
|
||||
|
||||
.. csv-table::
|
||||
:file: ../data/reference/compatibility-matrix-historical-6.0.csv
|
||||
:widths: 20,10,10,10,10,10,10,10
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#Ubuntu220405-past-60] Preview support of Ubuntu 22.04.5 only
|
||||
.. [#red-hat94-past-60] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#oracle89-past-60] Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
|
||||
|
||||
@@ -410,7 +410,7 @@ description, refer to the corresponding library data type support page.
|
||||
- ❌/❌
|
||||
- ❌/❌
|
||||
*
|
||||
- rocRAND (:doc:`details <rocrand:data-type-support>`)
|
||||
- rocRAND (:doc:`details <rocrand:api-reference/data-type-support>`)
|
||||
- -/✅
|
||||
- -/✅
|
||||
- -/✅
|
||||
|
||||
@@ -65,7 +65,7 @@ This example is adapted from the PyTorch research hub page on [Inception V3](htt
|
||||
|
||||
Follow these steps:
|
||||
|
||||
1. Run the PyTorch ROCm-based Docker image or refer to the section {doc}`Installing PyTorch <rocm-install-on-linux:how-to/3rd-party/pytorch-install>` for setting up a PyTorch environment on ROCm.
|
||||
1. Run the PyTorch ROCm-based Docker image or refer to the section {doc}`Installing PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>` for setting up a PyTorch environment on ROCm.
|
||||
|
||||
```dockerfile
|
||||
docker run -it -v $HOME:/data --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 8G rocm/pytorch:latest
|
||||
@@ -155,7 +155,7 @@ The previous section focused on downloading and using the Inception V3 model for
|
||||
|
||||
Follow these steps:
|
||||
|
||||
1. Run the PyTorch ROCm Docker image or refer to the section {doc}`Installing PyTorch <rocm-install-on-linux:how-to/3rd-party/pytorch-install>` for setting up a PyTorch environment on ROCm.
|
||||
1. Run the PyTorch ROCm Docker image or refer to the section {doc}`Installing PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>` for setting up a PyTorch environment on ROCm.
|
||||
|
||||
```dockerfile
|
||||
docker pull rocm/pytorch:latest
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="ROCm compilers disambiguation">
|
||||
<meta name="keywords" content="compilers, compiler naming, AMD, ROCm">
|
||||
</head>
|
||||
|
||||
# ROCm compilers disambiguation
|
||||
|
||||
ROCm ships multiple compilers of varying origins and purposes. This article
|
||||
disambiguates compiler naming used throughout the documentation.
|
||||
|
||||
## Compiler terms
|
||||
|
||||
| Term | Description |
|
||||
| - | - |
|
||||
| `amdclang++` | Clang/LLVM-based compiler that is part of `rocm-llvm` package. The source code is available at <a href="https://github.com/ROCm/llvm-project" target="_blank">https://github.com/ROCm/llvm-project</a>. |
|
||||
| AOCC | Closed-source clang-based compiler that includes additional CPU optimizations. Offered as part of ROCm via the `rocm-llvm-alt` package. See for details, <a href="https://developer.amd.com/amd-aocc/" target="_blank">https://developer.amd.com/amd-aocc/</a>. |
|
||||
| HIP-Clang | Informal term for the `amdclang++` compiler |
|
||||
| HIPIFY | Tools including `hipify-clang` and `hipify-perl`, used to automatically translate CUDA source code into portable HIP C++. The source code is available at <a href="https://github.com/ROCm/HIPIFY" target="_blank">https://github.com/ROCm/HIPIFY</a> |
|
||||
| `hipcc` | HIP compiler driver. A utility that invokes `clang` or `nvcc` depending on the target and passes the appropriate include and library options for the target compiler and HIP infrastructure. The source code is available at <a href="https://github.com/ROCm/HIPCC" target="_blank">https://github.com/ROCm/HIPCC</a>. |
|
||||
| ROCmCC | Clang/LLVM-based compiler. ROCmCC in itself is not a binary but refers to the overall compiler. |
|
||||
@@ -9,6 +9,6 @@
|
||||
|
||||
The following topics describe using specific features of the compilation tools:
|
||||
|
||||
* [Using AddressSanitizer](./using-gpu-sanitizer.md)
|
||||
* [Compiler disambiguation](./compiler-disambiguation.md)
|
||||
* [OpenMP support in ROCm](../about/compatibility/openmp.md)
|
||||
* [ROCm compiler infrastructure](https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html)
|
||||
* [Using AddressSanitizer](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html)
|
||||
* [OpenMP support](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html)
|
||||
|
||||
@@ -1,431 +0,0 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="Using the LLVM ASan on a GPU">
|
||||
<meta name="keywords" content="LLVM, ASan, address sanitizer, AddressSanitizer, instrumented
|
||||
libraries, instrumented applications, AMD, ROCm">
|
||||
</head>
|
||||
|
||||
# Using the AddressSanitizer on a GPU (beta release)
|
||||
|
||||
The LLVM AddressSanitizer (ASan) provides a process that allows developers to detect runtime addressing errors in applications and libraries. The detection is achieved using a combination of compiler-added instrumentation and runtime techniques, including function interception and replacement.
|
||||
Until now, the LLVM ASan process was only available for traditional purely CPU applications. However, ROCm has extended this mechanism to additionally allow the detection of some addressing errors on the GPU in heterogeneous applications. Ideally, developers should treat heterogeneous HIP and OpenMP applications exactly like pure CPU applications. However, this simplicity has not been achieved yet.
|
||||
This document provides documentation on using ROCm ASan.
|
||||
|
||||
For information about LLVM ASan, see the [LLVM documentation](https://clang.llvm.org/docs/AddressSanitizer.html).
|
||||
|
||||
:::{note}
|
||||
The beta release of LLVM ASan for ROCm is currently tested and validated on Ubuntu 20.04.
|
||||
:::
|
||||
|
||||
## Compiling for ASan
|
||||
|
||||
The ASan process begins by compiling the application of interest with the ASan instrumentation.
|
||||
|
||||
Recommendations for doing this are:
|
||||
|
||||
* Compile as many application and dependent library sources as possible using an AMD-built clang-based compiler such as `amdclang++`.
|
||||
* Add the following options to the existing compiler and linker options:
|
||||
|
||||
* `-fsanitize=address` - enables instrumentation
|
||||
|
||||
* `-shared-libsan` - use shared version of runtime
|
||||
|
||||
* `-g` - add debug info for improved reporting
|
||||
|
||||
* Explicitly use `xnack+` in the offload architecture option. For example, `--offload-arch=gfx90a:xnack+`
|
||||
|
||||
Other architectures are allowed, but their device code will not be instrumented and a warning will be emitted.
|
||||
|
||||
:::{tip}
|
||||
It is not an error to compile some files without ASan instrumentation, but doing so reduces the ability of the process to detect addressing errors. However, if the main program "`a.out`" does not directly depend on the ASan runtime (`libclang_rt.asan-x86_64.so`) after the build completes (check by running `ldd` (List Dynamic Dependencies) or `readelf`), the application will immediately report an error at runtime as described in the next section.
|
||||
:::
|
||||
|
||||
:::{note}
|
||||
When compiling OpenMP programs with ASan instrumentation, it is currently necessary to set the environment variable `LIBRARY_PATH` to `/opt/rocm-<version>/lib/llvm/lib/asan:/opt/rocm-<version>/lib/asan`. At runtime, it may be necessary to add `/opt/rocm-<version>/lib/llvm/lib/asan` to `LD_LIBRARY_PATH`.
|
||||
:::
|
||||
|
||||
### About compilation time
|
||||
|
||||
When `-fsanitize=address` is used, the LLVM compiler adds instrumentation code around every memory operation. This added code must be handled by all downstream components of the compiler toolchain and results in increased overall compilation time. This increase is especially evident in the AMDGPU device compiler and has in a few instances raised the compile time to an unacceptable level.
|
||||
|
||||
There are a few options if the compile time becomes unacceptable:
|
||||
|
||||
* Avoid instrumentation of the files which have the worst compile times. This will reduce the effectiveness of the ASan process.
|
||||
* Add the option `-fsanitize-recover=address` to the compiles with the worst compile times. This option simplifies the added instrumentation resulting in faster compilation. See below for more information.
|
||||
* Disable instrumentation on a per-function basis by adding `__attribute__`((no_sanitize("address"))) to functions found to be responsible for the large compile time. Again, this will reduce the effectiveness of the process.
|
||||
|
||||
## Installing ROCm GPU ASan packages
|
||||
|
||||
For a complete ROCm GPU Sanitizer installation, including packages, instrumented HSA and HIP runtimes, tools, and math libraries, use the following instruction,
|
||||
|
||||
```bash
|
||||
sudo apt-get install rocm-ml-sdk-asan
|
||||
|
||||
```
|
||||
|
||||
## Using AMD-supplied ASan instrumented libraries
|
||||
|
||||
ROCm releases have optional packages that contain additional ASan instrumented builds of the ROCm libraries (usually found in `/opt/rocm-<version>/lib`). The instrumented libraries have identical names to the regular uninstrumented libraries, and are located in `/opt/rocm-<version>/lib/asan`.
|
||||
These additional libraries are built using the `amdclang++` and `hipcc` compilers, while some uninstrumented libraries are built with `g++`. The preexisting build options are used but, as described above, additional options are used: `-fsanitize=address`, `-shared-libsan` and `-g`.
|
||||
|
||||
These additional libraries avoid additional developer effort to locate repositories, identify the correct branch, check out the correct tags, and other efforts needed to build the libraries from the source. And they extend the ability of the process to detect addressing errors into the ROCm libraries themselves.
|
||||
|
||||
When adjusting an application build to add instrumentation, linking against these instrumented libraries is unnecessary. For example, any `-L` `/opt/rocm-<version>/lib` compiler options need not be changed. However, the instrumented libraries should be used when the application is run. It is particularly important that the instrumented language runtimes, like `libamdhip64.so` and `librocm-core.so`, are used; otherwise, device invalid access detections may not be reported.
|
||||
|
||||
## Running ASan instrumented applications
|
||||
|
||||
### Preparing to run an instrumented application
|
||||
|
||||
Here are a few recommendations to consider before running an ASan instrumented heterogeneous application.
|
||||
|
||||
* Ensure the Linux kernel running on the system has Heterogeneous Memory Management (HMM) support. A kernel version of 5.6 or higher should be sufficient.
|
||||
* Ensure XNACK is enabled
|
||||
* For `gfx90a` (MI-2X0) or `gfx940` (MI-3X0) use environment `HSA_XNACK = 1`.
|
||||
* For `gfx906` (MI-50) or `gfx908` (MI-100) use environment `HSA_XNACK = 1` but also ensure the amdgpu kernel module is loaded with module argument `noretry=0`.
|
||||
This requirement is due to the fact that the XNACK setting for these GPUs is system-wide.
|
||||
|
||||
* Ensure that the application will use the instrumented libraries when it runs. The output from the shell command `ldd <application name>` can be used to see which libraries will be used.
|
||||
If the instrumented libraries are not listed by `ldd`, the environment variable `LD_LIBRARY_PATH` may need to be adjusted, or in some cases an `RPATH` compiled into the application may need to be changed and the application recompiled.
|
||||
|
||||
* Ensure that the application depends on the ASan runtime. This can be checked by running the command `readelf -d <application name> | grep NEEDED` and verifying that shared library: `libclang_rt.asan-x86_64.so` appears in the output.
|
||||
If it does not appear, when executed the application will quickly output an ASan error that looks like:
|
||||
|
||||
```bash
|
||||
==3210==ASan runtime does not come first in initial library list; you should either link runtime to your application or manually preload it with LD_PRELOAD.
|
||||
```
|
||||
|
||||
* Ensure that the application `llvm-symbolizer` can be executed, and that it is located in `/opt/rocm-<version>/llvm/bin`. This executable is not strictly required, but if found is used to translate ("symbolize") a host-side instruction address into a more useful function name, file name, and line number (assuming the application has been built to include debug information).
|
||||
|
||||
There is an environment variable, `ASAN_OPTIONS`, that can be used to adjust the runtime behavior of the ASan runtime itself. There are more than a hundred "flags" that can be adjusted (see an old list at [flags](https://github.com/google/sanitizers/wiki/AddressSanitizerFlags)) but the default settings are correct and should be used in most cases. It must be noted that these options only affect the host ASan runtime. The device runtime only currently supports the default settings for the few relevant options.
|
||||
|
||||
There are three `ASAN_OPTION` flags of note.
|
||||
|
||||
* `halt_on_error=0/1 default 1`.
|
||||
|
||||
This tells the ASan runtime to halt the application immediately after detecting and reporting an addressing error. The default makes sense because the application has entered the realm of undefined behavior. If the developer wishes to have the application continue anyway, this option can be set to zero. However, the application and libraries should then be compiled with the additional option `-fsanitize-recover=address`. Note that the ROCm optional ASan instrumented libraries are not compiled with this option and if an error is detected within one of them, but halt_on_error is set to 0, more undefined behavior will occur.
|
||||
|
||||
* `detect_leaks=0/1 default 1`.
|
||||
|
||||
This option directs the ASan runtime to enable the [Leak Sanitizer](https://clang.llvm.org/docs/LeakSanitizer.html) (LSan). For heterogeneous applications, this default results in significant output from the leak sanitizer when the application exits due to allocations made by the language runtime which are not considered to be leaks. This output can be avoided by adding `detect_leaks=0` to the `ASAN_OPTIONS`, or alternatively by producing an LSan suppression file (syntax described [here](https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer)) and activating it with environment variable `LSAN_OPTIONS=suppressions=/path/to/suppression/file`. When using a suppression file, a suppression report is printed by default. The suppression report can be disabled by using the `LSAN_OPTIONS` flag `print_suppressions=0`.
|
||||
|
||||
* `quarantine_size_mb=N default 256`
|
||||
|
||||
This option defines the number of megabytes (MB) `N` of memory that the ASan runtime will hold after it is `freed` to detect use-after-free situations. This memory is unavailable for other purposes. The default of 256 MB may be too small to detect some use-after-free situations, especially given that the large size of many GPU memory allocations may push `freed` allocations out of quarantine before the attempted use.
|
||||
|
||||
:::{note}
|
||||
Setting the value of `quarantine_size_mb` larger may enable more problematic uses to be detected, but at the cost of reducing memory available for other purposes.
|
||||
:::
|
||||
|
||||
## Runtime overhead
|
||||
|
||||
Running an ASan instrumented application incurs
|
||||
overheads which may result in unacceptably long runtimes
|
||||
or failure to run at all.
|
||||
|
||||
### Higher execution time
|
||||
|
||||
ASan detection works by checking each address at runtime
|
||||
before the address is actually accessed by a load, store, or atomic
|
||||
instruction.
|
||||
This checking involves an additional load to "shadow" memory which
|
||||
records whether the address is "poisoned" or not, and additional logic
|
||||
that decides whether to produce an detection report or not.
|
||||
|
||||
This extra runtime work can cause the application to slow down by
|
||||
a factor of three or more, depending on how many memory accesses are
|
||||
executed.
|
||||
For heterogeneous applications, the shadow memory must be accessible by all devices
|
||||
and this can mean that shadow accesses from some devices may be more costly
|
||||
than non-shadow accesses.
|
||||
|
||||
### Higher memory use
|
||||
|
||||
The address checking described above relies on the compiler to surround
|
||||
each program variable with a red zone and on ASan
|
||||
runtime to surround each runtime memory allocation with a red zone and
|
||||
fill the shadow corresponding to each red zone with poison.
|
||||
The added memory for the red zones is additional overhead on top
|
||||
of the 13% overhead for the shadow memory itself.
|
||||
|
||||
Applications which consume most one or more available memory pools when
|
||||
run normally are likely to encounter allocation failures when run with
|
||||
instrumentation.
|
||||
|
||||
## Runtime reporting
|
||||
|
||||
It is not the intention of this document to provide a detailed explanation of all the types of reports that can be output by the ASan runtime. Instead, the focus is on the differences between the standard reports for CPU issues, and reports for GPU issues.
|
||||
|
||||
An invalid address detection report for the CPU always starts with
|
||||
|
||||
```bash
|
||||
==<PID>==ERROR: AddressSanitizer: <problem type> on address <memory address> at pc <pc> bp <bp> sp <sp> <access> of size <N> at <memory address> thread T0
|
||||
```
|
||||
|
||||
and continues with a stack trace for the access, a stack trace for the allocation and deallocation, if relevant, and a dump of the shadow near the <memory address>.
|
||||
|
||||
In contrast, an invalid address detection report for the GPU always starts with
|
||||
|
||||
```bash
|
||||
==<PID>==ERROR: AddressSanitizer: <problem type> on amdgpu device <device> at pc <pc> <access> of size <n> in workgroup id (<X>,<Y>,<Z>)
|
||||
```
|
||||
|
||||
Above, `<device>` is the integer device ID, and `(<X>, <Y>, <Z>)` is the ID of the workgroup or block where the invalid address was detected.
|
||||
|
||||
While the CPU report include a call stack for the thread attempting the invalid access, the GPU is currently to a call stack of size one, i.e. the (symbolized) of the invalid access, e.g.
|
||||
|
||||
```bash
|
||||
#0 <pc> in <fuction signature> at /path/to/file.hip:<line>:<column>
|
||||
```
|
||||
|
||||
This short call stack is followed by a GPU unique section that looks like
|
||||
|
||||
```bash
|
||||
Thread ids and accessed addresses:
|
||||
<lid0> <maddr 0> : <lid1> <maddr1> : ...
|
||||
```
|
||||
|
||||
where each `<lid j> <maddr j>` indicates the lane ID and the invalid memory address held by lane `j` of the wavefront attempting the invalid access.
|
||||
|
||||
Additionally, reports for invalid GPU accesses to memory allocated by GPU code via `malloc` or new starting with, for example,
|
||||
|
||||
```bash
|
||||
==1234==ERROR: AddressSanitizer: heap-buffer-overflow on amdgpu device 0 at pc 0x7fa9f5c92dcc
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
==5678==ERROR: AddressSanitizer: heap-use-after-free on amdgpu device 3 at pc 0x7f4c10062d74
|
||||
```
|
||||
|
||||
currently may include one or two surprising CPU side tracebacks mentioning :`hostcall`". This is due to how `malloc` and `free` are implemented for GPU code and these call stacks can be ignored.
|
||||
|
||||
## Running ASan with `rocgdb`
|
||||
|
||||
`rocgdb` can be used to further investigate ASan detected errors, with some preparation.
|
||||
|
||||
Currently, the ASan runtime complains when starting `rocgdb` without preparation.
|
||||
|
||||
```bash
|
||||
$ rocgdb my_app
|
||||
==1122==ASan` runtime does not come first in initial library list; you should either link runtime to your application or manually preload it with LD_PRELOAD.
|
||||
```
|
||||
|
||||
This is solved by setting environment variable `LD_PRELOAD` to the path to the ASan runtime, whose path can be obtained using the command
|
||||
|
||||
```bash
|
||||
amdclang++ -print-file-name=libclang_rt.asan-x86_64.so
|
||||
```
|
||||
|
||||
You should also set the environment variable `HIP_ENABLE_DEFERRED_LOADING=0` before debugging HIP applications.
|
||||
|
||||
After starting `rocgdb` breakpoints can be set on the ASan runtime error reporting entry points of interest. For example, if an ASan error report includes
|
||||
|
||||
```bash
|
||||
WRITE of size 4 in workgroup id (10,0,0)
|
||||
```
|
||||
|
||||
the `rocgdb` command needed to stop the program before the report is printed is
|
||||
|
||||
```bash
|
||||
(gdb) break __asan_report_store4
|
||||
```
|
||||
|
||||
Similarly, the appropriate command for a report including
|
||||
|
||||
```bash
|
||||
READ of size <N> in workgroup ID (1,2,3)
|
||||
```
|
||||
|
||||
is
|
||||
|
||||
```bash
|
||||
(gdb) break __asan_report_load<N>
|
||||
```
|
||||
|
||||
It is possible to set breakpoints on all ASan report functions using these commands:
|
||||
|
||||
```bash
|
||||
$ rocgdb <path to application>
|
||||
(gdb) start <commmand line arguments>
|
||||
(gdb) rbreak ^__asan_report
|
||||
(gdb) c
|
||||
```
|
||||
|
||||
## Using ASan with a short HIP application
|
||||
|
||||
Consider the following simple and short demo of using the Address Sanitizer with a HIP application:
|
||||
|
||||
```C++
|
||||
|
||||
#include <cstdlib>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
__global__ void
|
||||
set1(int *p)
|
||||
{
|
||||
int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
p[i] = 1;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int m = std::atoi(argv[1]);
|
||||
int n1 = std::atoi(argv[2]);
|
||||
int n2 = std::atoi(argv[3]);
|
||||
int c = std::atoi(argv[4]);
|
||||
int *dp;
|
||||
hipMalloc(&dp, m*sizeof(int));
|
||||
hipLaunchKernelGGL(set1, dim3(n1), dim3(n2), 0, 0, dp);
|
||||
int *hp = (int*)malloc(c * sizeof(int));
|
||||
hipMemcpy(hp, dp, m*sizeof(int), hipMemcpyDeviceToHost);
|
||||
hipDeviceSynchronize();
|
||||
hipFree(dp);
|
||||
free(hp);
|
||||
std::puts("Done.");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
This application will attempt to access invalid addresses for certain command line arguments. In particular, if `m < n1 * n2` some device threads will attempt to access
|
||||
unallocated device memory.
|
||||
|
||||
Or, if `c < m`, the `hipMemcpy` function will copy past the end of the `malloc` allocated memory.
|
||||
|
||||
**Note**: The `hipcc` compiler is used here for simplicity.
|
||||
|
||||
Compiling without XNACK results in a warning.
|
||||
|
||||
```bash
|
||||
$ hipcc -g --offload-arch=gfx90a:xnack- -fsanitize=address -shared-libsan mini.hip -o mini
|
||||
clang++: warning: ignoring` `-fsanitize=address' option for offload arch 'gfx90a:xnack-`, as it is not currently supported there. Use it with an offload arch containing 'xnack+' instead [-Woption-ignored]`.
|
||||
```
|
||||
|
||||
The binary compiled above will run, but the GPU code will not be instrumented and the `m < n1 * n2` error will not be detected. Switching to `--offload-arch=gfx90a:xnack+` in the command above results in a warning-free compilation and an instrumented application. After setting `PATH`, `LD_LIBRARY_PATH` and `HSA_XNACK` as described earlier, a check of the binary with `ldd` yields the following,
|
||||
|
||||
```bash
|
||||
$ ldd mini
|
||||
linux-vdso.so.1 (0x00007ffd1a5ae000)
|
||||
libclang_rt.asan-x86_64.so => /opt/rocm-6.1.0-99999/llvm/lib/clang/17.0.0/lib/linux/libclang_rt.asan-x86_64.so (0x00007fb9c14b6000)
|
||||
libamdhip64.so.5 => /opt/rocm-6.1.0-99999/lib/asan/libamdhip64.so.5 (0x00007fb9bedd3000)
|
||||
libstdc++.so.6 => /lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007fb9beba8000)
|
||||
libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fb9bea59000)
|
||||
libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007fb9bea3e000)
|
||||
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fb9be84a000)
|
||||
libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007fb9be844000)
|
||||
libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007fb9be821000)
|
||||
librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 (0x00007fb9be817000)
|
||||
libamd_comgr.so.2 => /opt/rocm-6.1.0-99999/lib/asan/libamd_comgr.so.2 (0x00007fb9b4382000)
|
||||
libhsa-runtime64.so.1 => /opt/rocm-6.1.0-99999/lib/asan/libhsa-runtime64.so.1 (0x00007fb9b3b00000)
|
||||
libnuma.so.1 => /lib/x86_64-linux-gnu/libnuma.so.1 (0x00007fb9b3af3000)
|
||||
/lib64/ld-linux-x86-64.so.2 (0x00007fb9c2027000)
|
||||
libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007fb9b3ad7000)
|
||||
libtinfo.so.6 => /lib/x86_64-linux-gnu/libtinfo.so.6 (0x00007fb9b3aa7000)
|
||||
libelf.so.1 => /lib/x86_64-linux-gnu/libelf.so.1 (0x00007fb9b3a89000)
|
||||
libdrm.so.2 => /opt/amdgpu/lib/x86_64-linux-gnu/libdrm.so.2 (0x00007fb9b3a70000)
|
||||
libdrm_amdgpu.so.1 => /opt/amdgpu/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1 (0x00007fb9b3a62000)
|
||||
|
||||
```
|
||||
|
||||
This confirms that the address sanitizer runtime is linked in, and the ASan instrumented version of the runtime libraries are used.
|
||||
Checking the `PATH` yields
|
||||
|
||||
```bash
|
||||
$ which llvm-symbolizer
|
||||
/opt/rocm-6.1.0-99999/llvm/bin/llvm-symbolizer
|
||||
```
|
||||
|
||||
Lastly, a check of the OS kernel version yields
|
||||
|
||||
```bash
|
||||
$ uname -rv
|
||||
5.15.0-73-generic #80~20.04.1-Ubuntu SMP Wed May 17 14:58:14 UTC 2023
|
||||
```
|
||||
|
||||
which indicates that the required HMM support (kernel version > 5.6) is available. This completes the necessary setup. Running with `m = 100`, `n1 = 11`, `n2 = 10` and `c = 100` should produce
|
||||
a report for an invalid access by the last 10 threads.
|
||||
|
||||
```bash
|
||||
=================================================================
|
||||
==3141==ERROR: AddressSanitizer: heap-buffer-overflow on amdgpu device 0 at pc 0x7fb1410d2cc4
|
||||
WRITE of size 4 in workgroup id (10,0,0)
|
||||
#0 0x7fb1410d2cc4 in set1(int*) at /home/dave/mini/mini.cpp:0:10
|
||||
|
||||
Thread ids and accessed addresses:
|
||||
00 : 0x7fb14371d190 01 : 0x7fb14371d194 02 : 0x7fb14371d198 03 : 0x7fb14371d19c 04 : 0x7fb14371d1a0 05 : 0x7fb14371d1a4 06 : 0x7fb14371d1a8 07 : 0x7fb14371d1ac
|
||||
08 : 0x7fb14371d1b0 09 : 0x7fb14371d1b4
|
||||
|
||||
0x7fb14371d190 is located 0 bytes after 400-byte region [0x7fb14371d000,0x7fb14371d190)
|
||||
allocated by thread T0 here:
|
||||
#0 0x7fb151c76828 in hsa_amd_memory_pool_allocate /work/dave/git/compute/external/llvm-project/compiler-rt/lib/asan/asan_interceptors.cpp:692:3
|
||||
#1 ...
|
||||
|
||||
#12 0x7fb14fb99ec4 in hipMalloc /work/dave/git/compute/external/clr/hipamd/src/hip_memory.cpp:568:3
|
||||
#13 0x226630 in hipError_t hipMalloc<int>(int**, unsigned long) /opt/rocm-6.1.0-99999/include/hip/hip_runtime_api.h:8367:12
|
||||
#14 0x226630 in main /home/dave/mini/mini.cpp:19:5
|
||||
#15 0x7fb14ef02082 in __libc_start_main /build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16
|
||||
|
||||
Shadow bytes around the buggy address:
|
||||
0x7fb14371cf00: ...
|
||||
|
||||
=>0x7fb14371d180: 00 00[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa
|
||||
0x7fb14371d200: ...
|
||||
|
||||
Shadow byte legend (one shadow byte represents 8 application bytes):
|
||||
Addressable: 00
|
||||
Partially addressable: 01 02 03 04 05 06 07
|
||||
Heap left redzone: fa
|
||||
...
|
||||
==3141==ABORTING
|
||||
```
|
||||
|
||||
Running with `m = 100`, `n1 = 10`, `n2 = 10` and `c = 99` should produce a report for an invalid copy.
|
||||
|
||||
```shell
|
||||
=================================================================
|
||||
==2817==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x514000150dcc at pc 0x7f5509551aca bp 0x7ffc90a7ae50 sp 0x7ffc90a7a610
|
||||
WRITE of size 400 at 0x514000150dcc thread T0
|
||||
#0 0x7f5509551ac9 in __asan_memcpy /work/dave/git/compute/external/llvm-project/compiler-rt/lib/asan/asan_interceptors_memintrinsics.cpp:61:3
|
||||
#1 ...
|
||||
|
||||
#9 0x7f5507462a28 in hipMemcpy_common(void*, void const*, unsigned long, hipMemcpyKind, ihipStream_t*) /work/dave/git/compute/external/clr/hipamd/src/hip_memory.cpp:637:10
|
||||
#10 0x7f5507464205 in hipMemcpy /work/dave/git/compute/external/clr/hipamd/src/hip_memory.cpp:642:3
|
||||
#11 0x226844 in main /home/dave/mini/mini.cpp:22:5
|
||||
#12 0x7f55067c3082 in __libc_start_main /build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16
|
||||
#13 0x22605d in _start (/home/dave/mini/mini+0x22605d)
|
||||
|
||||
0x514000150dcc is located 0 bytes after 396-byte region [0x514000150c40,0x514000150dcc)
|
||||
allocated by thread T0 here:
|
||||
#0 0x7f5509553dcf in malloc /work/dave/git/compute/external/llvm-project/compiler-rt/lib/asan/asan_malloc_linux.cpp:69:3
|
||||
#1 0x226817 in main /home/dave/mini/mini.cpp:21:21
|
||||
#2 0x7f55067c3082 in __libc_start_main /build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16
|
||||
|
||||
SUMMARY: AddressSanitizer: heap-buffer-overflow /work/dave/git/compute/external/llvm-project/compiler-rt/lib/asan/asan_interceptors_memintrinsics.cpp:61:3 in __asan_memcpy
|
||||
Shadow bytes around the buggy address:
|
||||
0x514000150b00: ...
|
||||
|
||||
=>0x514000150d80: 00 00 00 00 00 00 00 00 00[04]fa fa fa fa fa fa
|
||||
0x514000150e00: ...
|
||||
|
||||
Shadow byte legend (one shadow byte represents 8 application bytes):
|
||||
Addressable: 00
|
||||
Partially addressable: 01 02 03 04 05 06 07
|
||||
Heap left redzone: fa
|
||||
...
|
||||
==2817==ABORTING
|
||||
```
|
||||
|
||||
## Known issues with using GPU sanitizer
|
||||
|
||||
* Red zones must have limited size. It is possible for an invalid access to completely miss a red zone and not be detected.
|
||||
|
||||
* Lack of detection or false reports can be caused by the runtime not properly maintaining red zone shadows.
|
||||
|
||||
* Lack of detection on the GPU might also be due to the implementation not instrumenting accesses to all GPU specific address spaces. For example, in the current implementation accesses to "private" or "stack" variables on the GPU are not instrumented, and accesses to HIP shared variables (also known as "local data store" or "LDS") are also not instrumented.
|
||||
|
||||
* It can also be the case that a memory fault is reported for an invalid address even with the instrumentation. This is usually caused by the invalid address being so wild that its shadow address is outside any memory region, and the fault actually occurs on the access to the shadow address. It is also possible to hit a memory fault for the `NULL` pointer. While address 0 does have a shadow location, it is not poisoned by the runtime.
|
||||
|
||||
* There is currently a bug which can result in memory faults being reported when running instrumented device code which makes use of `malloc`, `free`, `new`, or `delete`.
|
||||
|
||||
* There is currently a bug which can result in undefined symbols being reported at compile time when instrumented device code makes use of `new` and `delete`.
|
||||
26
docs/conf.py
@@ -4,11 +4,14 @@
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
||||
# Keep capitalization due to similar linking on GitHub's markdown preview.
|
||||
shutil.copy2("../RELEASE.md", "./about/release-notes.md")
|
||||
|
||||
os.system("mkdir -p ../_readthedocs/html/downloads")
|
||||
os.system("cp data/reference/compatibility-matrix-historical-6.0.csv ../_readthedocs/html/downloads/compatibility-matrix-historical-6.0.csv")
|
||||
|
||||
latex_engine = "xelatex"
|
||||
latex_elements = {
|
||||
"fontpkg": r"""
|
||||
@@ -18,20 +21,25 @@ latex_elements = {
|
||||
"""
|
||||
}
|
||||
|
||||
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "rocm.docs.amd.com")
|
||||
html_context = {}
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
# configurations for PDF output by Read the Docs
|
||||
project = "ROCm Documentation"
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.1.5"
|
||||
release = "6.1.5"
|
||||
copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.2.0"
|
||||
release = "6.2.0"
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux", "windows"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# pages with specific settings
|
||||
article_pages = [
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-03-04"},
|
||||
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
|
||||
{"file": "about/release-notes", "os": ["linux", "windows"], "date": "2024-08-02"},
|
||||
{"file": "about/changelog", "os": ["linux", "windows"], "date": "2024-08-02"},
|
||||
{"file": "how-to/deep-learning-rocm", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
|
||||
@@ -74,6 +82,7 @@ article_pages = [
|
||||
"file": "how-to/llm-fine-tuning-optimization/profiling-and-debugging",
|
||||
"os": ["linux"],
|
||||
},
|
||||
{"file": "how-to/performance-validation/mi300x/vllm-benchmark", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/index", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/mi200", "os": ["linux"]},
|
||||
@@ -92,6 +101,11 @@ extensions = ["rocm_docs", "sphinx_reredirects"]
|
||||
|
||||
external_projects_current_project = "rocm"
|
||||
|
||||
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "rocm-stg.amd.com")
|
||||
html_context = {}
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
html_theme = "rocm_docs_theme"
|
||||
html_theme_options = {"flavor": "rocm-docs-home"}
|
||||
|
||||
|
||||
@@ -56,6 +56,10 @@ To make edits to our documentation via PR, follow these steps:
|
||||
6. Change directory into the `./docs` folder and make any documentation changes locally using your preferred code editor. Follow the guidelines listed on the
|
||||
[documentation structure](./doc-structure.md) page.
|
||||
|
||||
```{note}
|
||||
Spell checking is performed for pull requests by {doc}`ROCm Docs Core<rocm-docs-core:index>`. To ensure your PR passes spell checking you might need at add new words or acronyms to the `.wordlist.txt` file as described in {doc}`Spell Check<rocm-docs-core:user_guide/spellcheck>`.
|
||||
```
|
||||
|
||||
7. Optionally run a local test build of the documentation to ensure the content builds and looks as expected. In your terminal, run the following commands from within the `./docs` folder of your cloned repository:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -6,26 +6,10 @@
|
||||
|
||||
# Providing feedback
|
||||
|
||||
There are four standard ways to provide feedback on this repository.
|
||||
Your feedback is welcome. You can provide feedback either through GitHub Discussions or GitHub Issues.
|
||||
|
||||
## Pull request
|
||||
Use [GitHub Discussions](https://github.com/ROCm/ROCm/discussions) to ask questions, view announcements, and communicate with other members of the community.
|
||||
|
||||
All contributions to ROCm documentation should arrive via the
|
||||
[GitHub Flow](https://docs.github.com/en/get-started/quickstart/github-flow)
|
||||
targeting the develop branch of the repository.
|
||||
Use [GitHub Issues](https://github.com/ROCm/ROCm/issues) to submit issues you find with ROCm or with the ROCm documentation.
|
||||
|
||||
For more in-depth information on creating a pull request (PR), see
|
||||
[Contributing](./contributing.md).
|
||||
|
||||
## GitHub discussions
|
||||
|
||||
To ask questions or view answers to frequently asked questions, refer to
|
||||
[GitHub Discussions](https://github.com/ROCm/ROCm/discussions).
|
||||
On GitHub Discussions, in addition to asking and answering questions,
|
||||
members can share updates, have open-ended conversations,
|
||||
and follow along on via public announcements.
|
||||
|
||||
## GitHub issue
|
||||
|
||||
Issues on existing or absent documentation can be filed in
|
||||
[GitHub Issues](https://github.com/ROCm/ROCm/issues).
|
||||
For information about contributing to the ROCm repository and creating a pull request (PR), see [Contributing](./contributing.md).
|
||||
|
||||
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 54 KiB |
BIN
docs/data/how-to/tuning-guides/mi300a-rocm-smi-output.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
docs/data/how-to/tuning-guides/mi300a-rocm-smi-showhw-output.png
Normal file
|
After Width: | Height: | Size: 103 KiB |
|
After Width: | Height: | Size: 113 KiB |
BIN
docs/data/reference/banner-compilers.jpg
Normal file
|
After Width: | Height: | Size: 61 KiB |
BIN
docs/data/reference/banner-runtimes.jpg
Normal file
|
After Width: | Height: | Size: 73 KiB |
114
docs/data/reference/compatibility-matrix-historical-6.0.csv
Normal file
@@ -0,0 +1,114 @@
|
||||
ROCm Version,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`,Ubuntu 24.04,,,,,,
|
||||
,"Ubuntu 22.04.5 [#Ubuntu220405-past-60]_, 22.04.4","Ubuntu 22.04.5 [#Ubuntu220405-past-60]_,22.04.4, 22.04.3","Ubuntu 22.04.5 [#Ubuntu220405-past-60]_,22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <rocm-install-on-linux:install/3rd-party/jax-install>`,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,
|
||||
Thrust,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
:doc:`rocAL <rocal:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipFORT <hipfort:index>`,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
`Tensile <https://github.com/ROCm/Tensile>`_,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.0.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.0,rocm-6.1.5,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0
|
||||
,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
|
Before Width: | Height: | Size: 250 KiB |
BIN
docs/data/rocm-software-stack-6_2_0.jpg
Normal file
|
After Width: | Height: | Size: 288 KiB |
BIN
docs/data/unused-images/banner-optimization.jpg
Normal file
|
After Width: | Height: | Size: 59 KiB |
23
docs/how-to/build-rocm.rst
Normal file
@@ -0,0 +1,23 @@
|
||||
.. meta::
|
||||
:description: Build ROCm from source
|
||||
:keywords: build ROCm, source, ROCm source, ROCm, repo, make, makefile
|
||||
|
||||
|
||||
.. _building-rocm:
|
||||
|
||||
*************************************************************
|
||||
Build ROCm from source
|
||||
*************************************************************
|
||||
|
||||
ROCm is an open-source stack from which you can build from source code. The source code is available from `<https://github.com/ROCm/ROCm>`__.
|
||||
|
||||
|
||||
The general steps to build ROCm are:
|
||||
|
||||
#. Clone the ROCm source code
|
||||
#. Prepare the build environment
|
||||
#. Run the build command
|
||||
|
||||
Because the ROCm stack is constantly evolving, the most current instructions are stored with the source code in GitHub.
|
||||
For detailed build instructions, see `Build ROCm from source <https://github.com/ROCm/ROCm?tab=readme-ov-file#build-rocm-from-source>`_
|
||||
|
||||
@@ -13,9 +13,9 @@ frameworks to ensure that framework-specific optimizations take advantage of AMD
|
||||
|
||||
The following guides cover installation processes for ROCm-aware deep learning frameworks.
|
||||
|
||||
* :doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
|
||||
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
|
||||
* :doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
|
||||
* :doc:`PyTorch for ROCm <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
|
||||
* :doc:`JAX for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
|
||||
The following chart steps through typical installation workflows for installing deep learning frameworks for ROCm.
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ This section discusses how to implement `vLLM <https://docs.vllm.ai/en/latest>`_
|
||||
vLLM inference
|
||||
==============
|
||||
|
||||
vLLM is renowned for its paged attention algorithm that can reduce memory consumption and increase throughput thanks to
|
||||
vLLM is renowned for its PagedAttention algorithm that can reduce memory consumption and increase throughput thanks to
|
||||
its paging scheme. Instead of allocating GPU high-bandwidth memory (HBM) for the maximum output token lengths of the
|
||||
models, the paged attention of vLLM allocates GPU HBM dynamically for its actual decoding lengths. This paged attention
|
||||
is also effective when multiple requests share the same key and value contents for a large value of beam search or
|
||||
@@ -137,6 +137,10 @@ Installing vLLM
|
||||
|
||||
Refer to :ref:`mi300x-vllm-optimization` for performance optimization tips.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
.. _fine-tuning-llms-tgi:
|
||||
|
||||
Hugging Face TGI
|
||||
|
||||
@@ -250,4 +250,4 @@ page describes the options.
|
||||
:align: center
|
||||
|
||||
Learn more about optimizing kernels with TunableOp in
|
||||
:ref:`Optimizing Triton kernels <fine-tuning-llms-triton-tunableop>`.
|
||||
:ref:`Optimizing Triton kernels <mi300x-tunableop>`.
|
||||
|
||||
@@ -189,6 +189,9 @@ Installing bitsandbytes
|
||||
# Use -DBNB_ROCM_ARCH to specify target GPU arch
|
||||
cmake -DBNB_ROCM_ARCH="gfx942" -DCOMPUTE_BACKEND=hip -S .
|
||||
|
||||
# Compile the project
|
||||
make
|
||||
|
||||
# Install
|
||||
python setup.py install
|
||||
|
||||
@@ -219,12 +222,11 @@ To get started with bitsandbytes primitives, use the following code as reference
|
||||
Using bitsandbytes with Hugging Face Transformers
|
||||
-------------------------------------------------
|
||||
|
||||
To load a Transformers model in 4-bit, set ``load_int_4bt=true`` in ``BitsAndBytesConfig``.
|
||||
To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndBytesConfig``.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM
|
||||
from bitsandbytes import BitsAndBytesConfig
|
||||
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
||||
@@ -240,8 +242,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from bitsandbytes import BitsAndBytesConfig
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
|
||||
|
||||
@@ -37,14 +37,14 @@ Setting up the base implementation environment
|
||||
----------------------------------------------
|
||||
|
||||
#. Install PyTorch for ROCm. Refer to the
|
||||
:doc:`PyTorch installation guide <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`. For consistent
|
||||
:doc:`PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`. For consistent
|
||||
installation, it’s recommended to use official ROCm prebuilt Docker images with the framework pre-installed.
|
||||
|
||||
#. In the Docker container, check the availability of ROCM-capable accelerators using the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
rocm-smi -showproductname
|
||||
rocm-smi --showproductname
|
||||
|
||||
#. Check that your accelerators are available to PyTorch.
|
||||
|
||||
@@ -95,7 +95,7 @@ Now, it's important to adjust how you load the model. Add the ``device_map`` par
|
||||
# Load base model to GPU memory
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model_name,
|
||||
device_map = "auto"
|
||||
device_map = "auto",
|
||||
trust_remote_code = True)
|
||||
...
|
||||
# Run training
|
||||
|
||||
@@ -38,7 +38,7 @@ Setting up the base implementation environment
|
||||
----------------------------------------------
|
||||
|
||||
#. Install PyTorch for ROCm. Refer to the
|
||||
:doc:`PyTorch installation guide <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`. For a consistent
|
||||
:doc:`PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`. For a consistent
|
||||
installation, it’s recommended to use official ROCm prebuilt Docker images with the framework pre-installed.
|
||||
|
||||
#. In the Docker container, check the availability of ROCm-capable accelerators using the following command.
|
||||
@@ -103,7 +103,7 @@ Setting up the base implementation environment
|
||||
pip install peft
|
||||
|
||||
# Install the other dependencies.
|
||||
pip install transformers, datasets, huggingface-hub, scipy
|
||||
pip install transformers datasets huggingface-hub scipy
|
||||
|
||||
#. Check that the required packages can be imported.
|
||||
|
||||
|
||||
334
docs/how-to/performance-validation/mi300x/vllm-benchmark.rst
Normal file
@@ -0,0 +1,334 @@
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the unified
|
||||
ROCm Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate
|
||||
|
||||
***********************************************************
|
||||
LLM inference performance validation on AMD Instinct MI300X
|
||||
***********************************************************
|
||||
|
||||
.. _vllm-benchmark-unified-docker:
|
||||
|
||||
The `ROCm vLLM Docker <https://hub.docker.com/r/rocm/vllm/tags>`_ image offers
|
||||
a prebuilt, optimized environment designed for validating large language model
|
||||
(LLM) inference performance on the AMD Instinct™ MI300X accelerator. This
|
||||
ROCm vLLM Docker image integrates vLLM and PyTorch tailored specifically for the
|
||||
MI300X accelerator and includes the following components:
|
||||
|
||||
* `ROCm 6.2.0 <https://github.com/ROCm/ROCm>`_
|
||||
|
||||
* `vLLM 0.4.3 <https://docs.vllm.ai/en/latest>`_
|
||||
|
||||
* `PyTorch 2.4.0 <https://github.com/pytorch/pytorch>`_
|
||||
|
||||
* Tuning files (in CSV format)
|
||||
|
||||
With this Docker image, you can quickly validate the expected inference
|
||||
performance numbers on the MI300X accelerator. This topic also provides tips on
|
||||
optimizing performance with popular AI models.
|
||||
|
||||
.. _vllm-benchmark-vllm:
|
||||
|
||||
.. note::
|
||||
|
||||
vLLM is a toolkit and library for LLM inference and
|
||||
serving. It deploys the PagedAttention algorithm, which reduces memory
|
||||
consumption and increases throughput by leveraging dynamic key and value
|
||||
allocation in GPU memory. vLLM also incorporates many LLM acceleration
|
||||
and quantization algorithms. In addition, AMD implements high-performance
|
||||
custom kernels and modules in vLLM to enhance performance further. See
|
||||
:ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for more
|
||||
information.
|
||||
|
||||
Getting started
|
||||
===============
|
||||
|
||||
Use the following procedures to reproduce the benchmark results on an
|
||||
MI300X accelerator with the prebuilt vLLM Docker image.
|
||||
|
||||
.. _vllm-benchmark-get-started:
|
||||
|
||||
1. Disable NUMA auto-balancing.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# disable automatic NUMA balancing
|
||||
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
# check if NUMA balancing is disabled (returns 0 if disabled)
|
||||
cat /proc/sys/kernel/numa_balancing
|
||||
0
|
||||
|
||||
2. Download the :ref:`ROCm vLLM Docker image <vllm-benchmark-unified-docker>`.
|
||||
|
||||
Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
|
||||
|
||||
Once setup is complete, you can choose between two options to reproduce the
|
||||
benchmark results:
|
||||
|
||||
- :ref:`MAD-integrated benchmarking <vllm-benchmark-mad>`
|
||||
|
||||
- :ref:`Standalone benchmarking <vllm-benchmark-standalone>`
|
||||
|
||||
.. _vllm-benchmark-mad:
|
||||
|
||||
MAD-integrated benchmarking
|
||||
===========================
|
||||
|
||||
Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
Use this command to run a performance benchmark test of the Llama 3.1 8B model
|
||||
on one GPU with ``float16`` data type in the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
|
||||
|
||||
ROCm MAD launches a Docker container with the name
|
||||
``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the
|
||||
model are collected in the following path: ``~/MAD/reports_float16/``
|
||||
|
||||
Although the following eight models are pre-configured to collect latency and
|
||||
throughput performance data, users can also change the benchmarking parameters.
|
||||
Refer to the :ref:`Standalone benchmarking <vllm-benchmark-standalone>` section.
|
||||
|
||||
Available models
|
||||
----------------
|
||||
|
||||
.. hlist::
|
||||
:columns: 3
|
||||
|
||||
* ``pyt_vllm_llama-3.1-8b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-70b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-405b``
|
||||
|
||||
* ``pyt_vllm_llama-2-7b``
|
||||
|
||||
* ``pyt_vllm_mistral-7b``
|
||||
|
||||
* ``pyt_vllm_qwen2-7b``
|
||||
|
||||
* ``pyt_vllm_jais-13b``
|
||||
|
||||
* ``pyt_vllm_jais-30b``
|
||||
|
||||
.. _vllm-benchmark-standalone:
|
||||
|
||||
Standalone benchmarking
|
||||
=======================
|
||||
|
||||
You can run the vLLM benchmark tool independently by starting the
|
||||
:ref:`Docker container <vllm-benchmark-get-started>` as shown in the following
|
||||
snippet.
|
||||
|
||||
.. code-block::
|
||||
|
||||
docker pull rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
|
||||
docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name unified_docker_vllm rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
|
||||
|
||||
In the Docker container, clone the ROCm MAD repository and navigate to the
|
||||
benchmark scripts directory at ``~/MAD/scripts/vllm``.
|
||||
|
||||
.. code-block::
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/vllm
|
||||
|
||||
Multiprocessing distributed executor
|
||||
--------------------------------------
|
||||
|
||||
To optimize vLLM performance, add the multiprocessing API server argument ``--distributed-executor-backend mp``.
|
||||
|
||||
Command
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To start the benchmark, use the following command with the appropriate options.
|
||||
See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
|
||||
options and their descriptions.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype
|
||||
|
||||
See the :ref:`examples <vllm-benchmark-run-benchmark>` for more information.
|
||||
|
||||
.. note::
|
||||
|
||||
The input sequence length, output sequence length, and tensor parallel (TP) are
|
||||
already configured. You don't need to specify them with this script.
|
||||
|
||||
.. note::
|
||||
|
||||
If you encounter the following error, pass your access-authorized Hugging
|
||||
Face token to the gated models.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
OSError: You are trying to access a gated repo.
|
||||
|
||||
# pass your HF_TOKEN
|
||||
export HF_TOKEN=$your_personal_hf_token
|
||||
|
||||
.. _vllm-benchmark-standalone-options:
|
||||
|
||||
Options
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
* - ``$test_option``
|
||||
- latency
|
||||
- Measure decoding token latency
|
||||
|
||||
* -
|
||||
- throughput
|
||||
- Measure token generation throughput
|
||||
|
||||
* -
|
||||
- all
|
||||
- Measure both throughput and latency
|
||||
|
||||
* - ``$model_repo``
|
||||
- ``meta-llama/Meta-Llama-3.1-8B-Instruct``
|
||||
- Llama 3.1 8B
|
||||
|
||||
* - (``float16``)
|
||||
- ``meta-llama/Meta-Llama-3.1-70B-Instruct``
|
||||
- Llama 3.1 70B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Meta-Llama-3.1-405B-Instruct``
|
||||
- Llama 3.1 405B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Llama-2-7b-chat-hf``
|
||||
- Llama 2 7B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mixtral-8x7B-Instruct-v0.1``
|
||||
- Mixtral 8x7B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mixtral-8x22B-Instruct-v0.1``
|
||||
- Mixtral 8x22B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mistral-7B-Instruct-v0.3``
|
||||
- Mixtral 7B
|
||||
|
||||
* -
|
||||
- ``Qwen/Qwen2-7B-Instruct``
|
||||
- Qwen2 7B
|
||||
|
||||
* -
|
||||
- ``core42/jais-13b-chat``
|
||||
- JAIS 13B
|
||||
|
||||
* -
|
||||
- ``core42/jais-30b-chat-v3``
|
||||
- JAIS 30B
|
||||
|
||||
* - ``$num_gpu``
|
||||
- 1 or 8
|
||||
- Number of GPUs
|
||||
|
||||
* - ``$datatype``
|
||||
- ``float16``
|
||||
- Data type
|
||||
|
||||
.. _vllm-benchmark-run-benchmark:
|
||||
|
||||
Running the benchmark on the MI300X accelerator
|
||||
-----------------------------------------------
|
||||
|
||||
Here are some examples of running the benchmark with various options.
|
||||
See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
|
||||
options and their descriptions.
|
||||
|
||||
Latency benchmark example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to benchmark the latency of the Llama 3.1 8B model on one GPU with the ``float16`` data type.
|
||||
|
||||
.. code-block::
|
||||
|
||||
./vllm_benchmark_report.sh -s latency -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
|
||||
|
||||
Find the latency report at:
|
||||
|
||||
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_latency_report.csv``
|
||||
|
||||
Throughput benchmark example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to benchmark the throughput of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s throughput -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
|
||||
|
||||
Find the throughput reports at:
|
||||
|
||||
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_throughput_report.csv``
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<style>
|
||||
mjx-container[jax="CHTML"][display="true"] {
|
||||
text-align: left;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
.. note::
|
||||
|
||||
Throughput is calculated as:
|
||||
|
||||
- .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
|
||||
|
||||
- .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
|
||||
|
||||
- For a list of other ready-made Docker images for ROCm, see the
|
||||
:doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
|
||||
|
||||
@@ -41,6 +41,13 @@ vLLM walkthrough
|
||||
Refer to this developer blog for guidance on serving with vLLM `Inferencing and serving with vLLM on AMD GPUs — ROCm
|
||||
Blogs <https://rocm.blogs.amd.com/artificial-intelligence/vllm/README.html>`_
|
||||
|
||||
Validating vLLM performance
|
||||
---------------------------
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
.. _rocm-for-ai-serve-hugging-face-tgi:
|
||||
|
||||
Serving using Hugging Face TGI
|
||||
|
||||
@@ -16,31 +16,31 @@ Before getting started, install ROCm and supported machine learning frameworks.
|
||||
|
||||
Each release of ROCm supports specific hardware and software configurations. Before installing, consult the
|
||||
:doc:`System requirements <rocm-install-on-linux:reference/system-requirements>` and
|
||||
:doc:`Installation prerequisites <rocm-install-on-linux:how-to/prerequisites>` guides.
|
||||
:doc:`Installation prerequisites <rocm-install-on-linux:install/prerequisites>` guides.
|
||||
|
||||
If you’re new to ROCm, refer to the :doc:`ROCm quick start install guide for Linux
|
||||
<rocm-install-on-linux:tutorial/quick-start>`.
|
||||
<rocm-install-on-linux:install/quick-start>`.
|
||||
|
||||
If you’re using a Radeon GPU for graphics-accelerated applications, refer to the
|
||||
:doc:`Radeon installation instructions <radeon:docs/install/install-radeon>`.
|
||||
`Radeon installation instructions <https://rocm.docs.amd.com/projects/radeon/en/docs-6.1.3/docs/install/native_linux/install-radeon.html>`_.
|
||||
|
||||
ROCm supports two methods for installation. There is no difference in the final ROCm installation between these two
|
||||
methods. You can also opt for :ref:`single-version or multi-version installation
|
||||
<rocm-install-on-linux:installation-types>`.
|
||||
ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install/install-overview>`:
|
||||
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:how-to/native-install/index>`
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/native-install/index>`
|
||||
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:how-to/amdgpu-install>`
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`
|
||||
|
||||
* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`.
|
||||
|
||||
.. grid:: 1
|
||||
|
||||
.. grid-item-card:: Post-install
|
||||
|
||||
Follow the :doc:`post-installation instructions <rocm-install-on-linux:how-to/native-install/post-install>` to
|
||||
Follow the :doc:`post-installation instructions <rocm-install-on-linux:install/post-install>` to
|
||||
configure your system linker, PATH, and verify the installation.
|
||||
|
||||
If you encounter any issues during installation, refer to the
|
||||
:doc:`Installation troubleshooting <rocm-install-on-linux:how-to/native-install/install-faq>` guide.
|
||||
:doc:`Installation troubleshooting <rocm-install-on-linux:reference/install-faq>` guide.
|
||||
|
||||
Machine learning frameworks
|
||||
===========================
|
||||
@@ -53,8 +53,10 @@ ROCm supports popular machine learning frameworks and libraries including `PyTor
|
||||
Review the framework installation documentation. For ease-of-use, it's recommended to use official ROCm prebuilt Docker
|
||||
images with the framework pre-installed.
|
||||
|
||||
* :doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
|
||||
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
|
||||
* :doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
|
||||
* :doc:`PyTorch for ROCm <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
|
||||
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
|
||||
|
||||
* :doc:`JAX for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
|
||||
The sections that follow in :doc:`Training a model <train-a-model>` are geared for a ROCm with PyTorch installation.
|
||||
|
||||
@@ -65,6 +65,12 @@ their own performance testing for additional tuning.
|
||||
|
||||
- `CDNA 3 architecture <https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf>`_
|
||||
|
||||
* - :doc:`AMD Instinct MI300A <mi300a>`
|
||||
|
||||
- `AMD Instinct MI300 instruction set architecture <https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf>`_
|
||||
|
||||
- `CDNA 3 architecture <https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf>`_
|
||||
|
||||
* - :doc:`AMD Instinct MI200 <mi200>`
|
||||
|
||||
- `AMD Instinct MI200 instruction set architecture <https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf>`_
|
||||
|
||||
@@ -365,9 +365,9 @@ installed.
|
||||
## System management
|
||||
|
||||
For a complete guide on how to install/manage/uninstall ROCm on Linux, refer to
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:tutorial/quick-start>`. To verify that the installation was
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:install/quick-start>`. To verify that the installation was
|
||||
successful, refer to the
|
||||
{doc}`post-install instructions<rocm-install-on-linux:how-to/native-install/post-install>` and
|
||||
{doc}`post-install instructions<rocm-install-on-linux:install/post-install>` and
|
||||
[system tools](../../reference/rocm-tools.md). Should verification
|
||||
fail, consult the [System Debugging Guide](../system-debugging.md).
|
||||
|
||||
|
||||
@@ -350,9 +350,9 @@ installed.
|
||||
## System management
|
||||
|
||||
For a complete guide on how to install/manage/uninstall ROCm on Linux, refer to
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:tutorial/quick-start>`. For verifying that the
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:install/quick-start>`. For verifying that the
|
||||
installation was successful, refer to the
|
||||
{doc}`post-install instructions<rocm-install-on-linux:how-to/native-install/post-install>` and
|
||||
{doc}`post-install instructions<rocm-install-on-linux:install/post-install>` and
|
||||
[system tools](../../reference/rocm-tools.md). Should verification
|
||||
fail, consult the [System Debugging Guide](../system-debugging.md).
|
||||
|
||||
|
||||
452
docs/how-to/system-optimization/mi300a.rst
Normal file
@@ -0,0 +1,452 @@
|
||||
.. meta::
|
||||
:description: AMD Instinct MI300A system settings
|
||||
:keywords: AMD, Instinct, MI300A, HPC, tuning, BIOS settings, NBIO, ROCm,
|
||||
environment variable, performance, accelerator, GPU, EPYC, GRUB,
|
||||
operating system
|
||||
|
||||
***************************************************
|
||||
AMD Instinct MI300A system optimization
|
||||
***************************************************
|
||||
|
||||
This topic discusses the operating system settings and system management commands for
|
||||
the AMD Instinct MI300A accelerator. This topic can help you optimize performance.
|
||||
|
||||
System settings
|
||||
========================================
|
||||
|
||||
This section reviews the system settings required to configure a MI300A SOC system and
|
||||
optimize its performance.
|
||||
|
||||
The MI300A system-on-a-chip (SOC) design requires you to review and potentially adjust your OS configuration as explained in
|
||||
the :ref:`operating-system-settings-label` section. These settings are critical for
|
||||
performance because the OS on an accelerated processing unit (APU) is responsible for memory management across the CPU and GPU accelerators.
|
||||
In the APU memory model, system settings are available to limit GPU memory allocation.
|
||||
This limit is important because legacy software often determines the
|
||||
amount of allowable memory at start-up time
|
||||
by probing discrete memory until it is exhausted. If left unchecked, this practice
|
||||
can starve the OS of resources.
|
||||
|
||||
System BIOS settings
|
||||
-----------------------------------
|
||||
|
||||
System BIOS settings are preconfigured for optimal performance from the
|
||||
platform vendor. This means that you do not need to adjust these settings
|
||||
when using MI300A. If you have any questions regarding these settings,
|
||||
contact your MI300A platform vendor.
|
||||
|
||||
GRUB settings
|
||||
-----------------------------------
|
||||
|
||||
The ``/etc/default/grub`` file is used to configure the GRUB bootloader on modern Linux distributions.
|
||||
Linux uses the string assigned to ``GRUB_CMDLINE_LINUX`` in this file as
|
||||
its command line parameters during boot.
|
||||
|
||||
Appending strings using the Linux command line
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
It is recommended that you append the following string to ``GRUB_CMDLINE_LINUX``.
|
||||
|
||||
``pci=realloc=off``
|
||||
This setting disables the automatic reallocation
|
||||
of PCI resources, so Linux is able to unambiguously detect all GPUs on the
|
||||
MI300A-based system. It's used when Single Root I/O Virtualization (SR-IOV) Base
|
||||
Address Registers (BARs) have not been allocated by the BIOS. This can help
|
||||
avoid potential issues with certain hardware configurations.
|
||||
|
||||
Validating the IOMMU setting
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
IOMMU is a system-specific IO mapping mechanism for DMA mapping
|
||||
and isolation. IOMMU is turned off by default in the operating system settings
|
||||
for optimal performance.
|
||||
|
||||
To verify IOMMU is turned off, first install the ``acpica-tools`` package using your
|
||||
package manager.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
sudo apt install acpica-tools
|
||||
|
||||
Then confirm that the following commands do not return any results.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
sudo acpidump | grep IVRS
|
||||
sudo acpidump | grep DMAR
|
||||
|
||||
Update GRUB
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to update GRUB to use the modified configuration:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
sudo grub2-mkconfig -o /boot/grub2/grub.cfg
|
||||
|
||||
On some Red Hat-based systems, the ``grub2-mkconfig`` command might not be available. In this case,
|
||||
use ``grub-mkconfig`` instead. Verify that you have the
|
||||
correct version by using the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
grub-mkconfig -version
|
||||
|
||||
.. _operating-system-settings-label:
|
||||
|
||||
Operating system settings
|
||||
-----------------------------------
|
||||
|
||||
The operating system provides several options to customize and tune performance. For more information
|
||||
about supported operating systems, see the :doc:`Compatibility matrix <../../compatibility/compatibility-matrix>`.
|
||||
|
||||
If you are using a distribution other than RHEL or SLES, the latest Linux kernel is recommended.
|
||||
Performance considerations for the Zen4, which is the core architecture in the MI300A,
|
||||
require a Linux kernel running version 5.18 or higher.
|
||||
|
||||
This section describes performance-based settings.
|
||||
|
||||
* **Enable transparent huge pages**
|
||||
|
||||
To enable transparent huge pages, use one of the following methods:
|
||||
|
||||
* From the command line, run the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
echo always > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
|
||||
* Set the Linux kernel parameter ``transparent_hugepage`` as follows in the
|
||||
relevant ``.cfg`` file for your system.
|
||||
|
||||
.. code-block:: cfg
|
||||
|
||||
transparent_hugepage=always
|
||||
|
||||
* **Increase the amount of allocatable memory**
|
||||
|
||||
By default, when using a device allocator via HIP, it is only possible to allocate 96 GiB out of
|
||||
a possible 128 GiB of memory on the MI300A. This limitation does not affect host allocations.
|
||||
To increase the available system memory, load the ``amdttm`` module with new values for
|
||||
``pages_limit`` and ``page_pool_size``. These numbers correspond to the number of 4 KiB pages of memory.
|
||||
To make 128 GiB of memory available across all four devices, for a total amount of 512 GiB,
|
||||
set ``pages_limit`` and ``page_pool_size`` to ``134217728``. For a two-socket system, divide these values
|
||||
by two. After setting these values, reload the AMDGPU driver.
|
||||
|
||||
First, review the current settings using this shell command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
cat /sys/module/amdttm/parameters/pages_limit
|
||||
|
||||
To set the amount of allocatable memory to all available memory on all four APU devices, run these commands:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
sudo modprobe amdttm pages_limit=134217728 page_pool_size=134217728
|
||||
sudo modprobe amdgpu
|
||||
|
||||
These settings can also be hardcoded in the ``/etc/modprobe.d/amdttm.conf`` file or specified as boot
|
||||
parameters.
|
||||
|
||||
To use the hardcoded method,
|
||||
the filesystem must already be set up when the kernel driver is loaded.
|
||||
To hardcode the settings, add the following lines to ``/etc/modprobe.d/amdttm.conf``:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
options amdttm pages_limit=134217728
|
||||
options amdttm page_pool_size=134217728
|
||||
|
||||
If the filesystem is not already set up when the kernel driver is loaded, then the options
|
||||
must be specified as boot parameters. To specify the settings
|
||||
as boot parameters when loading the kernel, use this example as a guideline:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
vmlinux-[...] amdttm.pages_limit=134217728 amdttm.page_pool_size=134217728 [...]
|
||||
|
||||
To verify the new settings and confirm the change, use this command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
cat /sys/module/amdttm/parameters/pages_limit
|
||||
|
||||
.. note::
|
||||
|
||||
The system settings for ``pages_limit`` and ``page_pool_size`` are calculated by multiplying the
|
||||
per-APU limit of 4 KiB pages, which is ``33554432``, by the number of APUs on the node. The limit for a system with
|
||||
two APUs ``33554432 x 2`` or ``67108864``.
|
||||
This means the ``modprobe`` command for two APUs is ``sudo modprobe amdttm pages_limit=67108864 page_pool_size=67108864``.
|
||||
|
||||
* **Limit the maximum and single memory allocations on the GPU**
|
||||
|
||||
Many AI-related applications were originally developed on discrete GPUs. Some of these applications
|
||||
have fixed problem sizes associated with the targeted GPU size, and some attempt to determine the
|
||||
system memory limits by allocating chunks until failure. These techniques can cause issues in an
|
||||
APU with a shared space.
|
||||
|
||||
To allow these applications to run on the APU without further changes,
|
||||
ROCm supports a default memory policy that restricts the percentage of the GPU that can be allocated.
|
||||
The following environment variables control this feature:
|
||||
|
||||
* ``GPU_MAX_ALLOC_PERCENT``
|
||||
* ``GPU_SINGLE_ALLOC_PERCENT``
|
||||
|
||||
These settings can be added to the default shell environment or the user environment. The effect of the memory allocation
|
||||
settings varies depending on the system, configuration, and task. They might require adjustment, especially when performing GPU benchmarks. Setting these values to ``100``
|
||||
lets the GPU allocate any amount of free memory. However, the risk of encountering
|
||||
an operating system out-of-memory (OMM) condition increases when almost
|
||||
all the available memory is used.
|
||||
|
||||
Before setting either of these items to 100 percent,
|
||||
carefully consider the expected CPU workload allocation and the anticipated OS usage.
|
||||
For instance, if the OS requires 8GB on a 128GB system, setting these
|
||||
variables to ``100`` authorizes a single
|
||||
workload to allocate up to 120GB of memory. Unless the system has swap space configured
|
||||
any over-allocation attempts will be handled by the OMM policies.
|
||||
|
||||
* **Disable NUMA (Non-uniform memory access) balancing**
|
||||
|
||||
ROCm uses information from the compiled application to ensure an affinity exists
|
||||
between the GPU agent processes and their CPU hosts or co-processing agents.
|
||||
Because the APU has OS threads,
|
||||
including threads with memory management, the default kernel NUMA policies can
|
||||
adversely impact workload performance without additional tuning.
|
||||
|
||||
.. note::
|
||||
|
||||
At the kernel level, ``pci_relloc`` can also be set to ``off`` as an additional tuning measure.
|
||||
|
||||
To disable NUMA balancing, use one of the following methods:
|
||||
|
||||
* From the command line, run the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
echo 0 > /proc/sys/kernel/numa_balancing
|
||||
|
||||
* Set the following Linux kernel parameters in the
|
||||
relevant ``.cfg`` file for your system.
|
||||
|
||||
.. code-block:: cfg
|
||||
|
||||
pci=realloc=off numa_balancing=disable
|
||||
|
||||
* **Enable compaction**
|
||||
|
||||
Compaction is necessary for proper MI300A operation because the APU dynamically shares memory
|
||||
between the CPU and GPU. Compaction can be done proactively, which reduces
|
||||
allocation costs, or performed during allocation, in which case it is part of the background activities.
|
||||
Without compaction, the MI300A application performance eventually degrades as fragmentation increases.
|
||||
In RHEL distributions, compaction is disabled by default. In Ubuntu, it's enabled by default.
|
||||
|
||||
To enable compaction, enter the following commands using the command line:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
echo 20 > /proc/sys/vm/compaction_proactiveness
|
||||
echo 1 > /proc/sys/vm/compact_unevictable_allowed
|
||||
|
||||
.. _mi300a-processor-affinity:
|
||||
|
||||
* **Change affinity of ROCm helper threads**
|
||||
|
||||
Changing the affinity prevents internal ROCm threads from having their CPU core affinity mask
|
||||
set to all CPU cores available. With this setting, the threads inherit their parent's
|
||||
CPU core affinity mask. Before adjusting this setting, ensure you thoroughly understand
|
||||
your system topology and how the application, runtime environment, and batch system
|
||||
set the thread-to-core affinity. If you have any questions regarding this setting,
|
||||
contact your MI300A platform vendor or the AMD support team.
|
||||
To enable this setting, enter the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=0
|
||||
|
||||
* **CPU core states and C-states**
|
||||
|
||||
The system BIOS handles these settings for the MI300A.
|
||||
They don't need to be configured on the operating system.
|
||||
|
||||
System management
|
||||
========================================
|
||||
|
||||
For a complete guide on installing, managing, and uninstalling ROCm on Linux, see
|
||||
:doc:`Quick-start (Linux)<rocm-install-on-linux:install/quick-start>`. To verify that the
|
||||
installation was successful, see the
|
||||
:doc:`Post-installation instructions<rocm-install-on-linux:install/post-install>` and
|
||||
:doc:`ROCm tools <../../reference/rocm-tools>` guides. If verification
|
||||
fails, consult the :doc:`System debugging guide <../system-debugging>`.
|
||||
|
||||
.. _hw-verification-rocm-label:
|
||||
|
||||
Hardware verification with ROCm
|
||||
-----------------------------------
|
||||
|
||||
ROCm includes tools to query the system structure. To query
|
||||
the GPU hardware, use the ``rocm-smi`` command.
|
||||
|
||||
``rocm-smi`` reports statistics per socket, so the power results combine CPU and GPU utilization.
|
||||
In an idle state on a multi-socket system, some power imbalances are expected because
|
||||
the distribution of OS threads can keep some APU devices at higher power states.
|
||||
|
||||
.. note::
|
||||
|
||||
The MI300A VRAM settings show as ``N/A``.
|
||||
|
||||
.. image:: ../../data/how-to/tuning-guides/mi300a-rocm-smi-output.png
|
||||
:alt: Output from the rocm-smi command
|
||||
|
||||
The ``rocm-smi --showhw`` command shows the available system
|
||||
GPUs and their device ID and firmware details.
|
||||
|
||||
In the MI300A hardware settings, the system BIOS handles the UMC RAS. The
|
||||
ROCm-supplied GPU driver does not manage this setting.
|
||||
This results in a value of ``DISABLED`` for the ``UMC RAS`` setting.
|
||||
|
||||
.. image:: ../../data/how-to/tuning-guides/mi300a-rocm-smi-showhw-output.png
|
||||
:alt: Output from the ``rocm-smi showhw`` command
|
||||
|
||||
To see the system structure, the localization of the GPUs in the system, and the
|
||||
fabric connections between the system components, use the ``rocm-smi --showtopo`` command.
|
||||
|
||||
* The first block of the output shows the distance between the GPUs. The weight is a qualitative
|
||||
measure of the “distance” data must travel to reach one GPU from another.
|
||||
While the values do not have a precise physical meaning, the higher the value the
|
||||
more hops are required to reach the destination from the source GPU.
|
||||
* The second block contains a matrix named “Hops between two GPUs”, where ``1`` means
|
||||
the two GPUs are directly connected with XGMI, ``2`` means both GPUs are linked to the
|
||||
same CPU socket and GPU communications go through the CPU, and ``3`` means
|
||||
both GPUs are linked to different CPU sockets so communications go
|
||||
through both CPU sockets.
|
||||
* The third block indicates the link types between the GPUs. This can either be
|
||||
``XGMI`` for AMD Infinity Fabric links or ``PCIE`` for PCIe Gen4 links.
|
||||
* The fourth block reveals the localization of a GPU with respect to the NUMA organization
|
||||
of the shared memory of the AMD EPYC processors.
|
||||
|
||||
.. image:: ../../data/how-to/tuning-guides/mi300a-rocm-smi-showtopo-output.png
|
||||
:alt: Output from the ``rocm-smi showtopo`` command
|
||||
|
||||
Testing inter-device bandwidth
|
||||
-----------------------------------
|
||||
|
||||
The ``rocm-smi --showtopo`` command from the :ref:`hw-verification-rocm-label` section
|
||||
displays the system structure and shows how the GPUs are located and connected within this
|
||||
structure. For more information, use the :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`, which can run benchmarks to
|
||||
show the effective link bandwidth between the system components.
|
||||
|
||||
For information on how to install the ROCm Bandwidth Test, see :doc:`Building the environment <rocm_bandwidth_test:install/install>`.
|
||||
|
||||
The output lists the available compute devices (CPUs and GPUs), including
|
||||
their device ID and PCIe ID:
|
||||
|
||||
.. image:: ../../data/how-to/tuning-guides/mi300a-rocm-bandwidth-test-output.png
|
||||
:alt: Output from the rocm-bandwidth-test utility
|
||||
|
||||
It also displays the measured bandwidth for unidirectional and
|
||||
bidirectional transfers between the devices on the CPU and GPU:
|
||||
|
||||
.. image:: ../../data/how-to/tuning-guides/mi300a-rocm-peak-bandwidth-output.png
|
||||
:alt: Bandwidth information from the rocm-bandwidth-test utility
|
||||
|
||||
Abbreviations
|
||||
=============
|
||||
|
||||
APBDIS
|
||||
Algorithmic Performance Boost Disable
|
||||
|
||||
APU
|
||||
Accelerated processing unit
|
||||
|
||||
BAR
|
||||
Base Address Register
|
||||
|
||||
BIOS
|
||||
Basic Input/Output System
|
||||
|
||||
CBS
|
||||
Common BIOS Settings
|
||||
|
||||
CCD
|
||||
Compute Core Die
|
||||
|
||||
CDNA
|
||||
Compute DNA
|
||||
|
||||
CLI
|
||||
Command Line Interface
|
||||
|
||||
CPU
|
||||
Central Processing Unit
|
||||
|
||||
cTDP
|
||||
Configurable Thermal Design Power
|
||||
|
||||
DF
|
||||
Data Fabric
|
||||
|
||||
DMA
|
||||
Direct Memory Access
|
||||
|
||||
GPU
|
||||
Graphics Processing Unit
|
||||
|
||||
GRUB
|
||||
Grand Unified Bootloader
|
||||
|
||||
HBM
|
||||
High Bandwidth Memory
|
||||
|
||||
HPC
|
||||
High Performance Computing
|
||||
|
||||
IOMMU
|
||||
Input-Output Memory Management Unit
|
||||
|
||||
ISA
|
||||
Instruction Set Architecture
|
||||
|
||||
NBIO
|
||||
North Bridge Input/Output
|
||||
|
||||
NUMA
|
||||
Non-Uniform Memory Access
|
||||
|
||||
OMM
|
||||
Out of Memory
|
||||
|
||||
PCI
|
||||
Peripheral Component Interconnect
|
||||
|
||||
PCIe
|
||||
PCI Express
|
||||
|
||||
POR
|
||||
Power-On Reset
|
||||
|
||||
RAS
|
||||
Reliability, availability and serviceability
|
||||
|
||||
SMI
|
||||
System Management Interface
|
||||
|
||||
SMT
|
||||
Simultaneous Multi-threading
|
||||
|
||||
SOC
|
||||
System On Chip
|
||||
|
||||
SR-IOV
|
||||
Single Root I/O Virtualization
|
||||
|
||||
TSME
|
||||
Transparent Secure Memory Encryption
|
||||
|
||||
UMC
|
||||
Unified Memory Controller
|
||||
|
||||
VRAM
|
||||
Video RAM
|
||||
|
||||
xGMI
|
||||
Inter-chip Global Memory Interconnect
|
||||
@@ -473,6 +473,18 @@ It is recommended to set the following environment variable:
|
||||
|
||||
This is the default option as of ROCm 6.2.
|
||||
|
||||
Change affinity of ROCm helper threads
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This change prevents internal ROCm threads from having their CPU core affinity mask
|
||||
set to all CPU cores available. With this setting, the threads inherit their parent's
|
||||
CPU core affinity mask. If you have any questions regarding this setting,
|
||||
contact your MI300A platform vendor. To enable this setting, enter the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=0
|
||||
|
||||
IOMMU configuration -- systems with 256 CPU threads
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@@ -520,9 +532,9 @@ can provide system-level information, offering valuable insights for
|
||||
optimizing user applications.
|
||||
|
||||
For a complete guide on how to install, manage, or uninstall ROCm on Linux, refer to
|
||||
:doc:`rocm-install-on-linux:tutorial/quick-start`. For verifying that the
|
||||
:doc:`rocm-install-on-linux:install/quick-start`. For verifying that the
|
||||
installation was successful, refer to the
|
||||
:doc:`rocm-install-on-linux:how-to/native-install/post-install`.
|
||||
:doc:`rocm-install-on-linux:install/post-install`.
|
||||
Should verification fail, consult :doc:`/how-to/system-debugging`.
|
||||
|
||||
Hardware verification with ROCm
|
||||
@@ -695,8 +707,8 @@ Bidirectional bandwidth
|
||||
|
||||
Bidirectional bandwidth
|
||||
|
||||
Acronyms
|
||||
========
|
||||
Abbreviations
|
||||
=============
|
||||
|
||||
AMI
|
||||
American Megatrends International
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
This chapter reviews system settings that are required to configure the system
|
||||
for ROCm virtualization on RDNA2-based AMD Radeon™ PRO GPUs. Installing ROCm on
|
||||
Bare Metal follows the routine ROCm
|
||||
{doc}`installation procedure<rocm-install-on-linux:how-to/native-install/index>`.
|
||||
{doc}`installation procedure<rocm-install-on-linux:install/native-install/index>`.
|
||||
|
||||
To enable ROCm virtualization on V620, one has to setup Single Root I/O
|
||||
Virtualization (SR-IOV) in the BIOS via setting found in the following
|
||||
@@ -166,4 +166,4 @@ First, assign GPU virtual function (VF) to VM using the following steps.
|
||||
Then start the VM.
|
||||
|
||||
Finally install ROCm on the virtual machine (VM). For detailed instructions,
|
||||
refer to the {doc}`Linux install guide<rocm-install-on-linux:how-to/native-install/index>`.
|
||||
refer to the {doc}`Linux install guide<rocm-install-on-linux:install/native-install/index>`.
|
||||
|
||||
@@ -8,6 +8,8 @@ accelerators. They include detailed instructions on system settings and
|
||||
application tuning suggestions to help you fully leverage the capabilities of
|
||||
these accelerators, thereby achieving optimal performance.
|
||||
|
||||
* :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`
|
||||
|
||||
* :doc:`/how-to/tuning-guides/mi300x/system`
|
||||
|
||||
* :doc:`/how-to/tuning-guides/mi300x/workload`
|
||||
|
||||
@@ -150,6 +150,10 @@ the workload to validate improvements and ensure that the changes have had the
|
||||
desired effect. Continuous iteration helps refine the performance gains and
|
||||
address any new bottlenecks that may emerge.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image that has everything required to implement
|
||||
the tips in this section. It includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
.. _mi300x-profiling-tools:
|
||||
|
||||
Profiling tools
|
||||
@@ -162,12 +166,12 @@ tools available depending on their specific profiling needs.
|
||||
|
||||
* ROCProfiler tool collects kernel execution performance
|
||||
metrics. For more information, see the
|
||||
`ROCProfiler <https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprofv1.html>`_
|
||||
:doc:`ROCProfiler <rocprofiler:index>`
|
||||
documentation.
|
||||
|
||||
* Omniperf builds upon ROCProfiler but provides more guided analysis.
|
||||
For more information, see
|
||||
`Omniperf documentation <https://rocm.github.io/omniperf/>`_.
|
||||
:doc:`Omniperf documentation <omniperf:index>`.
|
||||
|
||||
Refer to :doc:`/how-to/llm-fine-tuning-optimization/profiling-and-debugging`
|
||||
to explore commonly used profiling tools and their usage patterns.
|
||||
@@ -372,6 +376,11 @@ Refer to `vLLM documentation <https://docs.vllm.ai/en/latest/models/performance.
|
||||
for additional performance tips. :ref:`fine-tuning-llms-vllm` describes vLLM
|
||||
usage with ROCm.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance
|
||||
of LLM inference with vLLM on the MI300X accelerator. The Docker image includes
|
||||
ROCm, vLLM, PyTorch, and tuning files in the CSV format. For more information,
|
||||
see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
Maximize throughput
|
||||
-------------------
|
||||
|
||||
|
||||
106
docs/index.md
@@ -5,81 +5,45 @@
|
||||
reference, ROCm, AMD">
|
||||
</head>
|
||||
|
||||
# AMD ROCm™ documentation
|
||||
# AMD ROCm documentation
|
||||
|
||||
Welcome to the ROCm docs home page! If you're new to ROCm, you can review the following
|
||||
resources to learn more about our products and what we support:
|
||||
ROCm is an open-source software platform optimized to extract HPC and AI workload
|
||||
performance from AMD Instinct accelerators and AMD Radeon GPUs while maintaining
|
||||
compatibility with industry software frameworks. For more information, see [What is ROCm?](./what-is-rocm.rst)
|
||||
|
||||
* [What is ROCm?](./what-is-rocm.rst)
|
||||
* [Release notes](./about/release-notes.md)
|
||||
If you're using Radeon GPUs, consider reviewing {doc}`Radeon-specific ROCm documentation<radeon:index>`.
|
||||
|
||||
You can install ROCm on our Radeon™, Radeon™ PRO, and Instinct™ GPUs. If you're using Radeon
|
||||
GPUs, we recommend reading the
|
||||
{doc}`Radeon-specific ROCm documentation<radeon:index>`.
|
||||
Installation instructions are available from:
|
||||
|
||||
For hands-on applications, refer to our [ROCm blogs](https://rocm.blogs.amd.com/) site.
|
||||
* {doc}`ROCm installation for Linux<rocm-install-on-linux:index>`
|
||||
* {doc}`HIP SDK installation for Windows<rocm-install-on-windows:index>`
|
||||
* [Deep learning frameworks installation](./how-to/deep-learning-rocm.rst)
|
||||
* [Build ROCm from source](./how-to/build-rocm.rst)
|
||||
|
||||
Our documentation is organized into the following categories:
|
||||
ROCm documentation is organized into the following categories:
|
||||
|
||||
::::{grid} 1 2 2 2
|
||||
:class-container: rocm-doc-grid
|
||||
|
||||
:::{grid-item-card}
|
||||
:img-top: ./data/banner-installation.jpg
|
||||
:img-alt: Install documentation
|
||||
:padding: 2
|
||||
|
||||
* Linux
|
||||
* {doc}`Linux install guide<rocm-install-on-linux:how-to/native-install/index>`
|
||||
* {doc}`Package manager integration<rocm-install-on-linux:how-to/native-install/package-manager-integration>`
|
||||
* {doc}`Install Docker containers<rocm-install-on-linux:how-to/docker>`
|
||||
* {doc}`ROCm & Spack<rocm-install-on-linux:how-to/spack>`
|
||||
* Windows
|
||||
* {doc}`Windows install guide<rocm-install-on-windows:how-to/install>`
|
||||
* {doc}`Application deployment guidelines<rocm-install-on-windows:conceptual/deployment-guidelines>`
|
||||
* [Deep learning frameworks](./how-to/deep-learning-rocm.rst)
|
||||
* {doc}`PyTorch for ROCm<rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
|
||||
* {doc}`TensorFlow for ROCm<rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
|
||||
* {doc}`JAX for ROCm<rocm-install-on-linux:how-to/3rd-party/jax-install>`
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ./data/banner-compatibility.jpg
|
||||
:img-alt: Compatibility information
|
||||
:padding: 2
|
||||
|
||||
* [Compatibility matrix](./compatibility/compatibility-matrix.rst)
|
||||
* {doc}`System requirements (Linux)<rocm-install-on-linux:reference/system-requirements>`
|
||||
* {doc}`System requirements (Windows)<rocm-install-on-windows:reference/system-requirements>`
|
||||
* {doc}`Linux system requirements<rocm-install-on-linux:reference/system-requirements>`
|
||||
* {doc}`Windows system requirements<rocm-install-on-windows:reference/system-requirements>`
|
||||
* {doc}`Third-party support<rocm-install-on-linux:reference/3rd-party-support-matrix>`
|
||||
* {doc}`User/kernel space<rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`
|
||||
* {doc}`Docker<rocm-install-on-linux:reference/docker-image-support-matrix>`
|
||||
* [OpenMP](./about/compatibility/openmp.md)
|
||||
* {doc}`OpenMP<llvm-project:conceptual/openmp>`
|
||||
* [Precision support](./compatibility/precision-support.rst)
|
||||
* {doc}`ROCm on Radeon GPUs<radeon:index>`
|
||||
:::
|
||||
|
||||
<!-- markdownlint-disable MD051 -->
|
||||
:::{grid-item-card}
|
||||
:img-top: ./data/banner-reference.jpg
|
||||
:img-alt: Reference documentation
|
||||
:padding: 2
|
||||
|
||||
* [API libraries](./reference/api-libraries.md)
|
||||
* [Artificial intelligence](#artificial-intelligence-apis)
|
||||
* [C++ primitives](#cpp-primitives)
|
||||
* [Communication](#communication-libraries)
|
||||
* [Math](#math-apis)
|
||||
* [Random number generators](#random-number-apis)
|
||||
* [HIP runtime](#hip-runtime)
|
||||
* [Tools](./reference/rocm-tools.md)
|
||||
* [Development](#development-tools)
|
||||
* [Performance analysis](#performance-tools)
|
||||
* [System](#system-tools)
|
||||
* [Hardware specifications](./reference/gpu-arch-specs.rst)
|
||||
:::
|
||||
<!-- markdownlint-enable MD051 -->
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ./data/banner-howto.jpg
|
||||
:img-alt: How-to documentation
|
||||
:padding: 2
|
||||
@@ -88,32 +52,21 @@ Our documentation is organized into the following categories:
|
||||
* [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
|
||||
* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
|
||||
* [System optimization](./how-to/system-optimization/index.rst)
|
||||
* [AMD Instinct MI300X](./how-to/system-optimization/mi300x.rst)
|
||||
* [AMD Instinct MI200](./how-to/system-optimization/mi200.md)
|
||||
* [AMD Instinct MI100](./how-to/system-optimization/mi100.md)
|
||||
* [AMD Instinct RDNA2](./how-to/system-optimization/w6000-v620.md)
|
||||
* [AMD Instinct MI300X tuning guides](./how-to/tuning-guides/mi300x/index.rst)
|
||||
* [System tuning](./how-to/tuning-guides/mi300x/system.rst)
|
||||
* [Workload tuning](./how-to/tuning-guides/mi300x/workload.rst)
|
||||
* [AMD Instinct MI300X performance validation and tuning](./how-to/tuning-guides/mi300x/index.rst)
|
||||
* [System debugging](./how-to/system-debugging.md)
|
||||
* [GPU-enabled MPI](./how-to/gpu-enabled-mpi.rst)
|
||||
* [Using compiler features](./conceptual/compiler-topics.md)
|
||||
* [Using AddressSanitizer](./conceptual/using-gpu-sanitizer.md)
|
||||
* [Compiler disambiguation](./conceptual/compiler-disambiguation.md)
|
||||
* [OpenMP support in ROCm](./about/compatibility/openmp.md)
|
||||
* [Using advanced compiler features](./conceptual/compiler-topics.md)
|
||||
* [Setting the number of CUs](./how-to/setting-cus)
|
||||
* [GitHub examples](https://github.com/amd/rocm-examples)
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ./data/banner-conceptual.jpg
|
||||
:img-alt: Conceptual documentation
|
||||
:padding: 2
|
||||
|
||||
* [GPU architecture](./conceptual/gpu-arch.md)
|
||||
* [MI100](./conceptual/gpu-arch/mi100.md)
|
||||
* [MI250](./conceptual/gpu-arch/mi250.md)
|
||||
* [MI300](./conceptual/gpu-arch/mi300.md)
|
||||
* [GPU memory](./conceptual/gpu-memory.md)
|
||||
* [File structure (Linux FHS)](./conceptual/file-reorg.md)
|
||||
* [GPU isolation techniques](./conceptual/gpu-isolation.md)
|
||||
@@ -123,4 +76,23 @@ Our documentation is organized into the following categories:
|
||||
* [Inference optimization with MIGraphX](./conceptual/ai-migraphx-optimization.md)
|
||||
:::
|
||||
|
||||
<!-- markdownlint-disable MD051 -->
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ./data/banner-reference.jpg
|
||||
:img-alt: Reference documentation
|
||||
:padding: 2
|
||||
|
||||
* [Libraries](./reference/api-libraries.md)
|
||||
* [Artificial intelligence](#artificial-intelligence-apis)
|
||||
* [C++ primitives](#cpp-primitives)
|
||||
* [Communication](#communication-libraries)
|
||||
* [Math](#math-apis)
|
||||
* [Random number generators](#random-number-apis)
|
||||
* [HIP runtime](#hip-runtime)
|
||||
* [ROCm tools and compilers](./reference/rocm-tools.md)
|
||||
* [GPU hardware specifications](./reference/gpu-arch-specs.rst)
|
||||
:::
|
||||
<!-- markdownlint-enable MD051 -->
|
||||
|
||||
::::
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
algebra, AMD">
|
||||
</head>
|
||||
|
||||
# ROCm API libraries
|
||||
# ROCm libraries
|
||||
|
||||
::::{grid} 1 2 2 2
|
||||
:class-container: rocm-doc-grid
|
||||
@@ -25,6 +25,7 @@
|
||||
* {doc}`MIVisionX <mivisionx:index>`
|
||||
* {doc}`rocAL <rocal:index>`
|
||||
* {doc}`rocDecode <rocdecode:index>`
|
||||
* {doc}`rocPyDecode <rocpydecode:index>`
|
||||
* {doc}`ROCm Performance Primitives (RPP) <rpp:index>`
|
||||
:::
|
||||
|
||||
@@ -53,18 +54,6 @@
|
||||
* {doc}`RCCL <rccl:index>`
|
||||
:::
|
||||
|
||||
(hip-runtime)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-hip.jpg
|
||||
:img-alt: HIP APIs
|
||||
:padding: 2
|
||||
|
||||
* {doc}`HIP runtime <hip:index>`
|
||||
* {doc}`HIPIFY <hipify:index>`
|
||||
:::
|
||||
|
||||
(math-apis)=
|
||||
|
||||
:::{grid-item-card}
|
||||
@@ -78,6 +67,7 @@
|
||||
* {doc}`hipBLASLt <hipblaslt:index>`
|
||||
* {doc}`hipFFT <hipfft:index>` / {doc}`rocFFT <rocfft:index>`
|
||||
* {doc}`hipfort <hipfort:index>`
|
||||
* {doc}`hipRAND <hiprand:index>` / {doc}`rocRAND <rocrand:index>`
|
||||
* {doc}`hipSOLVER <hipsolver:index>` / {doc}`rocSOLVER <rocsolver:index>`
|
||||
* {doc}`hipSPARSE <hipsparse:index>` / {doc}`rocSPARSE <rocsparse:index>`
|
||||
* {doc}`hipSPARSELt <hipsparselt:index>`
|
||||
@@ -86,16 +76,4 @@
|
||||
* [Tensile](https://github.com/ROCm/Tensile)
|
||||
:::
|
||||
|
||||
(random-number-apis)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-random-number.jpg
|
||||
:img-alt: Random number APIs
|
||||
:padding: 2
|
||||
|
||||
* {doc}`hipRAND <hiprand:index>`
|
||||
* {doc}`rocRAND <rocrand:index>`
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
@@ -6,40 +6,11 @@
|
||||
algebra, AMD">
|
||||
</head>
|
||||
|
||||
# ROCm tools
|
||||
# ROCm tools, compilers, and runtimes
|
||||
|
||||
::::{grid} 1 2 2 2
|
||||
:class-container: rocm-doc-grid
|
||||
|
||||
(development-tools)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-development.jpg
|
||||
:img-alt: Development tools
|
||||
:padding: 2
|
||||
|
||||
* {doc}`HIPIFY <hipify:index>`
|
||||
* {doc}`ROCdbgapi <rocdbgapi:index>`
|
||||
* [ROCmCC](./rocmcc.md)
|
||||
* {doc}`ROCm Debugger (ROCgdb) <rocgdb:index>`
|
||||
* {doc}`ROCr Debug Agent <rocr_debug_agent:index>`
|
||||
:::
|
||||
|
||||
(performance-tools)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-performance.jpg
|
||||
:img-alt: Performance tools
|
||||
:padding: 2
|
||||
|
||||
* {doc}`ROCm Bandwidth Test <rocm_bandwidth_test:index>`
|
||||
* {doc}`ROCProfiler <rocprofiler:profiler_home_page>`
|
||||
* [rocprofiler-register](https://github.com/ROCm/rocprofiler-register)
|
||||
* {doc}`ROCTracer <roctracer:index>`
|
||||
:::
|
||||
|
||||
(system-tools)=
|
||||
|
||||
:::{grid-item-card}
|
||||
@@ -49,10 +20,67 @@
|
||||
:padding: 2
|
||||
|
||||
* {doc}`AMD SMI <amdsmi:index>`
|
||||
* {doc}`rocminfo <rocminfo:index>`
|
||||
* {doc}`ROCm Data Center Tool <rdc:index>`
|
||||
* {doc}`rocminfo <rocminfo:index>`
|
||||
* {doc}`ROCm SMI <rocm_smi_lib:index>`
|
||||
* {doc}`ROCm Validation Suite <rocmvalidationsuite:index>`
|
||||
:::
|
||||
|
||||
(performance-tools)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-performance.jpg
|
||||
:img-alt: Performance tools
|
||||
:padding: 2
|
||||
|
||||
* {doc}`Omniperf <omniperf:index>`
|
||||
* {doc}`Omnitrace <omnitrace:index>`
|
||||
* {doc}`ROCm Bandwidth Test <rocm_bandwidth_test:index>`
|
||||
* {doc}`ROCProfiler <rocprofiler:index>`
|
||||
* {doc}`ROCprofiler-SDK <rocprofiler-sdk:index>`
|
||||
* {doc}`ROCTracer <roctracer:index>`
|
||||
:::
|
||||
|
||||
(development-tools)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-development.jpg
|
||||
:img-alt: Development tools
|
||||
:padding: 2
|
||||
|
||||
* {doc}`ROCm CMake <rocmcmakebuildtools:index>`
|
||||
* {doc}`HIPIFY <hipify:index>`
|
||||
* {doc}`ROCdbgapi <rocdbgapi:index>`
|
||||
* {doc}`ROCm Debugger (ROCgdb) <rocgdb:index>`
|
||||
* {doc}`ROCr Debug Agent <rocr_debug_agent:index>`
|
||||
:::
|
||||
|
||||
(compilers)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-compilers.jpg
|
||||
:img-alt: Compilers
|
||||
:padding: 2
|
||||
|
||||
* {doc}`ROCm Compilers <llvm-project:index>`
|
||||
* {doc}`HIPCC <hipcc:index>`
|
||||
* [FLANG](https://github.com/ROCm/flang/)
|
||||
:::
|
||||
|
||||
(runtimes)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-runtimes.jpg
|
||||
:img-alt: Runtimes
|
||||
:padding: 2
|
||||
|
||||
* {doc}`AMD Common Language Runtime (CLR) <hip:understand/amd_clr>`
|
||||
* {doc}`HIP <hip:index>`
|
||||
* {doc}`ROCR-Runtime <rocr-runtime:index>`
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||